Skip to content

Commit 7bc7610

Browse files
committed
feat(mcp): add input sanitization and test coverage
MCP-SAN (#49): Input sanitization for the MCP server layer. - Add sanitize package: Content (Markdown structure injection), Reflect (truncate + strip control chars for error messages), SessionID (path-safe session identifiers), StripControl, Truncate - Sanitize all reflected user inputs in dispatch error messages (tool names, prompt names, resource URIs) via sanitize.Reflect - Reject unknown entry types before writing to .context/ files - Enforce MaxContentLen (32KB) on entry content in extract.EntryArgs - Sanitize entry content and optional fields via sanitize.Content and extract.SanitizedOpts before writing - Cap journal source limit to MaxSourceLimit (100) - Sanitize caller identifiers in session events - Add input length constants to config/mcp/cfg - Add error message keys for input-too-long and unknown-entry-type MCP-COV (#50): Comprehensive test coverage for MCP subsystem. - internal/mcp/proto: 22 schema round-trip and edge-case tests - internal/mcp/session: 7 state lifecycle tests (100% coverage) - internal/mcp/server: 4 integration tests (Serve edge cases, prompt add-learning) - internal/mcp/server/def/tool: 9 tool definition tests - internal/mcp/server/def/prompt: 9 prompt definition tests - internal/mcp/server/extract: 7 extraction and sanitization tests - internal/mcp/server/io: 3 WriteJSON tests (100% coverage) - internal/mcp/server/out: 8 response builder tests (100% coverage) - internal/mcp/server/parse: 3 request parsing tests (100% coverage) - internal/mcp/server/stat: 2 statistics tests (100% coverage) - internal/sanitize: 22 sanitization tests (Content, Reflect, SessionID, StripControl, Truncate + existing Filename) - Server package coverage: 73% -> 92% Closes #49 Closes #50 Signed-off-by: CoderMungan <codermungan@gmail.com>
1 parent 6554515 commit 7bc7610

28 files changed

Lines changed: 1840 additions & 13 deletions

File tree

internal/assets/commands/text/mcp.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,10 @@ mcp.err-unknown-prompt:
348348
short: 'unknown prompt: %s'
349349
mcp.err-uri-required:
350350
short: uri is required
351+
mcp.err-input-too-long:
352+
short: '%s exceeds maximum length (%d bytes)'
353+
mcp.err-unknown-entry-type:
354+
short: 'unknown entry type: %s'
351355
mcp.format-watch-completed:
352356
short: 'Completed: %s'
353357
mcp.format-wrote:

internal/config/embed/text/mcp_err.go

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,22 @@ const (
3030
// DescKeyMCPErrTypeContentRequired is the text key for mcp err type content
3131
// required messages.
3232
DescKeyMCPErrTypeContentRequired = "mcp.err-type-content-required"
33-
// DescKeyMCPErrQueryRequired is the text key for mcp err query required
34-
// messages.
33+
// DescKeyMCPErrQueryRequired is the text key for mcp err
34+
// query required messages.
3535
DescKeyMCPErrQueryRequired = "mcp.err-query-required"
36-
// DescKeyMCPErrSearchRead is the text key for mcp err search read messages.
36+
// DescKeyMCPErrSearchRead is the text key for mcp err
37+
// search read messages.
3738
DescKeyMCPErrSearchRead = "mcp.err-search-read"
38-
// DescKeyMCPErrUnknownPrompt is the text key for mcp err unknown prompt
39-
// messages.
39+
// DescKeyMCPErrUnknownPrompt is the text key for mcp err
40+
// unknown prompt messages.
4041
DescKeyMCPErrUnknownPrompt = "mcp.err-unknown-prompt"
41-
// DescKeyMCPErrURIRequired is the text key for mcp err uri required messages.
42+
// DescKeyMCPErrURIRequired is the text key for mcp err
43+
// uri required messages.
4244
DescKeyMCPErrURIRequired = "mcp.err-uri-required"
45+
// DescKeyMCPErrInputTooLong is the text key for mcp err
46+
// input too long messages.
47+
DescKeyMCPErrInputTooLong = "mcp.err-input-too-long"
48+
// DescKeyMCPErrUnknownEntryType is the text key for mcp
49+
// err unknown entry type messages.
50+
DescKeyMCPErrUnknownEntryType = "mcp.err-unknown-entry-type"
4351
)

internal/config/mcp/cfg/config.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,23 @@ const (
1313

1414
// DefaultSourceLimit is the max sessions returned by ctx_journal_source.
1515
DefaultSourceLimit = 5
16+
// MaxSourceLimit caps the source limit to prevent unbounded queries.
17+
MaxSourceLimit = 100
1618
// MinWordLen is the shortest word considered for overlap matching.
1719
MinWordLen = 4
1820
// MinWordOverlap is the minimum word matches to signal task completion.
1921
MinWordOverlap = 2
22+
23+
// --- Input length limits (MCP-SAN.1) ---
24+
25+
// MaxContentLen is the maximum byte length for entry content fields.
26+
MaxContentLen = 32_000
27+
// MaxNameLen is the maximum byte length for tool/prompt/resource names.
28+
MaxNameLen = 256
29+
// MaxQueryLen is the maximum byte length for search queries.
30+
MaxQueryLen = 1_000
31+
// MaxCallerLen is the maximum byte length for caller identifiers.
32+
MaxCallerLen = 128
33+
// MaxURILen is the maximum byte length for resource URIs.
34+
MaxURILen = 512
2035
)

internal/config/regex/sanitize.go

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
// / ctx: https://ctx.ist
2+
// ,'`./ do you remember?
3+
// `.,'\
4+
// \ Copyright 2026-present Context contributors.
5+
// SPDX-License-Identifier: Apache-2.0
6+
7+
package regex
8+
9+
import "regexp"
10+
11+
// SanEntryHeader matches entry headers like "## [2026-" in
12+
// content sanitization (MCP-SAN.3).
13+
var SanEntryHeader = regexp.MustCompile(
14+
`(?m)^##\s+\[\d{4}-`,
15+
)
16+
17+
// SanTaskCheckbox matches task checkboxes "- [ ]" and
18+
// "- [x]" in content sanitization.
19+
var SanTaskCheckbox = regexp.MustCompile(
20+
`(?m)^-\s+\[[x ]\]`,
21+
)
22+
23+
// SanConstitutionRule matches constitution rule format
24+
// "- [ ] **Never" in content sanitization.
25+
var SanConstitutionRule = regexp.MustCompile(
26+
`(?m)^-\s+\[[x ]\]\s+\*\*[A-Z]`,
27+
)
28+
29+
// SanSessionIDUnsafe matches characters not safe for session
30+
// IDs in file paths: anything outside [a-zA-Z0-9._-].
31+
var SanSessionIDUnsafe = regexp.MustCompile(
32+
`[^a-zA-Z0-9._-]`,
33+
)

internal/config/sanitize/doc.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// / ctx: https://ctx.ist
2+
// ,'`./ do you remember?
3+
// `.,'\
4+
// \ Copyright 2026-present Context contributors.
5+
// SPDX-License-Identifier: Apache-2.0
6+
7+
// Package sanitize defines string and length constants used by
8+
// the sanitize layer.
9+
//
10+
// Constants are referenced by internal/sanitize via config/sanitize.*.
11+
// Provides: [NullByte], [DotDot], [ForwardSlash], [Backslash],
12+
// [HyphenReplace], [EscapePrefix], [MaxSessionIDLen].
13+
package sanitize
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// / ctx: https://ctx.ist
2+
// ,'`./ do you remember?
3+
// `.,'\
4+
// \ Copyright 2026-present Context contributors.
5+
// SPDX-License-Identifier: Apache-2.0
6+
7+
package sanitize
8+
9+
// Sanitize-layer string and length constants.
10+
const (
11+
// NullByte is the null character stripped from untrusted input.
12+
NullByte = "\x00"
13+
14+
// DotDot is a path traversal sequence.
15+
DotDot = ".."
16+
17+
// ForwardSlash is the forward slash stripped from session IDs.
18+
ForwardSlash = "/"
19+
20+
// Backslash is the backslash stripped from session IDs.
21+
Backslash = "\\"
22+
23+
// HyphenReplace is the replacement character for unsafe
24+
// session ID characters.
25+
HyphenReplace = "-"
26+
27+
// EscapePrefix is the backslash prefix for escaping Markdown
28+
// structural patterns.
29+
EscapePrefix = `\`
30+
31+
// MaxSessionIDLen is the maximum byte length for a session
32+
// identifier.
33+
MaxSessionIDLen = 128
34+
)
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
// / ctx: https://ctx.ist
2+
// ,'`./ do you remember?
3+
// `.,'\
4+
// \ Copyright 2026-present Context contributors.
5+
// SPDX-License-Identifier: Apache-2.0
6+
7+
package entity
8+
9+
import (
10+
"testing"
11+
"time"
12+
)
13+
14+
func TestNewMCPSession(t *testing.T) {
15+
s := NewMCPSession()
16+
if s.ToolCalls != 0 {
17+
t.Errorf("ToolCalls = %d, want 0", s.ToolCalls)
18+
}
19+
if s.AddsPerformed == nil {
20+
t.Fatal("AddsPerformed should be initialized")
21+
}
22+
if len(s.AddsPerformed) != 0 {
23+
t.Errorf(
24+
"AddsPerformed length = %d, want 0",
25+
len(s.AddsPerformed),
26+
)
27+
}
28+
if s.SessionStartedAt.IsZero() {
29+
t.Error("SessionStartedAt should be set")
30+
}
31+
if len(s.PendingFlush) != 0 {
32+
t.Errorf(
33+
"PendingFlush length = %d, want 0",
34+
len(s.PendingFlush),
35+
)
36+
}
37+
}
38+
39+
func TestRecordToolCall(t *testing.T) {
40+
s := NewMCPSession()
41+
s.RecordToolCall()
42+
if s.ToolCalls != 1 {
43+
t.Errorf("ToolCalls = %d, want 1", s.ToolCalls)
44+
}
45+
s.RecordToolCall()
46+
s.RecordToolCall()
47+
if s.ToolCalls != 3 {
48+
t.Errorf("ToolCalls = %d, want 3", s.ToolCalls)
49+
}
50+
}
51+
52+
func TestRecordAdd(t *testing.T) {
53+
s := NewMCPSession()
54+
s.RecordAdd("task")
55+
s.RecordAdd("task")
56+
s.RecordAdd("decision")
57+
if s.AddsPerformed["task"] != 2 {
58+
t.Errorf(
59+
"task adds = %d, want 2",
60+
s.AddsPerformed["task"],
61+
)
62+
}
63+
if s.AddsPerformed["decision"] != 1 {
64+
t.Errorf(
65+
"decision adds = %d, want 1",
66+
s.AddsPerformed["decision"],
67+
)
68+
}
69+
}
70+
71+
func TestQueuePendingUpdate(t *testing.T) {
72+
s := NewMCPSession()
73+
now := time.Now()
74+
s.QueuePendingUpdate(PendingUpdate{
75+
Type: "task",
76+
Content: "Build feature",
77+
QueuedAt: now,
78+
})
79+
if len(s.PendingFlush) != 1 {
80+
t.Fatalf(
81+
"PendingFlush length = %d, want 1",
82+
len(s.PendingFlush),
83+
)
84+
}
85+
pu := s.PendingFlush[0]
86+
if pu.Type != "task" {
87+
t.Errorf(
88+
"Type = %q, want %q",
89+
pu.Type, "task",
90+
)
91+
}
92+
if pu.Content != "Build feature" {
93+
t.Errorf(
94+
"Content = %q, want %q",
95+
pu.Content, "Build feature",
96+
)
97+
}
98+
}

internal/err/mcp/mcp.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,3 +65,19 @@ func UnknownEventType(eventType string) error {
6565
eventType,
6666
)
6767
}
68+
69+
// InputTooLong returns an error when input exceeds the allowed
70+
// length.
71+
//
72+
// Parameters:
73+
// - field: the field name that is too long
74+
// - maxLen: the maximum allowed length
75+
//
76+
// Returns:
77+
// - error: "<field> exceeds maximum length of <maxLen>"
78+
func InputTooLong(field string, maxLen int) error {
79+
return fmt.Errorf(
80+
desc.Text(text.DescKeyMCPErrInputTooLong),
81+
field, maxLen,
82+
)
83+
}

0 commit comments

Comments
 (0)