tok/perplexity_test.go at main · GrayCodeAI/tok · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
package tok_test

import (
	"context"
	"errors"
	"strings"
	"testing"

	"github.com/GrayCodeAI/tok"
)

// TestHeuristicScorer_RanksRareAndStructuralHigher verifies the default
// heuristic scorer assigns higher importance to rare and structural tokens
// than to common filler words.
func TestHeuristicScorer_RanksRareAndStructuralHigher(t *testing.T) {
	scorer := tok.NewHeuristicPerplexityScorer()

	// "the" appears many times (common filler); the others appear once and are
	// rare / structural.
	tokens := []string{
		"the", "the", "the", "the",
		"quantumEntanglement", // camelCase identifier, rare
		"42",                  // numeric, rare
		"config.yaml",         // identifier-like path, rare
	}

	scores, err := scorer.Score(context.Background(), tokens)
	if err != nil {
		t.Fatalf("Score returned error: %v", err)
	}
	if len(scores) != len(tokens) {
		t.Fatalf("expected %d scores, got %d", len(tokens), len(scores))
	}

	idx := make(map[string]int)
	for i, tk := range tokens {
		idx[tk] = i
	}

	common := scores[idx["the"]]
	cases := []struct {
		name  string
		token string
	}{
		{"camelCase identifier", "quantumEntanglement"},
		{"numeric", "42"},
		{"path identifier", "config.yaml"},
	}
	for _, c := range cases {
		t.Run(c.name, func(t *testing.T) {
			if scores[idx[c.token]] <= common {
				t.Errorf("expected %q (%.3f) to score higher than common 'the' (%.3f)",
					c.token, scores[idx[c.token]], common)
			}
		})
	}
}

func TestHeuristicScorer_Empty(t *testing.T) {
	scorer := tok.NewHeuristicPerplexityScorer()
	scores, err := scorer.Score(context.Background(), nil)
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if len(scores) != 0 {
		t.Fatalf("expected no scores, got %d", len(scores))
	}
}

func TestScorerFunc_Adapter(t *testing.T) {
	called := false
	var sf tok.PerplexityScorer = tok.ScorerFunc(
		func(_ context.Context, tokens []string) ([]float64, error) {
			called = true
			out := make([]float64, len(tokens))
			for i := range out {
				out[i] = 1.0
			}
			return out, nil
		},
	)
	scores, err := sf.Score(context.Background(), []string{"a", "b"})
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if !called {
		t.Error("expected ScorerFunc to be invoked")
	}
	if len(scores) != 2 {
		t.Fatalf("expected 2 scores, got %d", len(scores))
	}
}

// TestCompressPerplexityGuided_ReducesTokens verifies the drop path reduces the
// token count by roughly dropRatio.
func TestCompressPerplexityGuided_ReducesTokens(t *testing.T) {
	// Repeated filler with a few rare tokens scattered in.
	in := strings.Join([]string{
		"the cat sat on the mat and the cat ran to the box",
		"the dog sat on the log and the dog ran to the den",
		"a rare quantumEntanglement token appears here exactly once",
	}, " ")

	tests := []struct {
		name      string
		dropRatio float64
	}{
		{"drop 30%", 0.3},
		{"drop 50%", 0.5},
	}
	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			origTokens := strings.Fields(in)
			out, stats := tok.CompressPerplexityGuided(in, tok.NewHeuristicPerplexityScorer(), tt.dropRatio)
			outTokens := strings.Fields(out)

			if len(outTokens) >= len(origTokens) {
				t.Fatalf("expected fewer tokens, got %d >= %d", len(outTokens), len(origTokens))
			}

			// Word-level drop should be within a tolerance band of dropRatio.
			actualDrop := 1.0 - float64(len(outTokens))/float64(len(origTokens))
			if actualDrop < tt.dropRatio-0.15 || actualDrop > tt.dropRatio+0.15 {
				t.Errorf("expected word drop ~%.2f, got %.2f (%d -> %d words)",
					tt.dropRatio, actualDrop, len(origTokens), len(outTokens))
			}
			if stats.TokensSaved <= 0 {
				t.Errorf("expected positive TokensSaved, got %d", stats.TokensSaved)
			}
			if stats.FinalTokens >= stats.OriginalTokens {
				t.Errorf("expected FinalTokens < OriginalTokens, got %d >= %d",
					stats.FinalTokens, stats.OriginalTokens)
			}
		})
	}
}

// TestCompressPerplexityGuided_KeepsHighImportance verifies that high-importance
// tokens survive the drop while common filler is removed.
func TestCompressPerplexityGuided_KeepsHighImportance(t *testing.T) {
	in := "the the the the the the the the importantIdentifier the the the the the the the the 9876 the the"
	out, _ := tok.CompressPerplexityGuided(in, tok.NewHeuristicPerplexityScorer(), 0.5)

	if !strings.Contains(out, "importantIdentifier") {
		t.Errorf("expected high-importance identifier preserved, got %q", out)
	}
	if !strings.Contains(out, "9876") {
		t.Errorf("expected high-importance number preserved, got %q", out)
	}
}

// TestCompressPerplexityGuided_PreservesProtectedSymbols verifies sentence
// boundaries / punctuation are never dropped.
func TestCompressPerplexityGuided_PreservesProtectedSymbols(t *testing.T) {
	in := "alpha beta gamma delta epsilon zeta eta theta . the the the the the the the the"
	out, _ := tok.CompressPerplexityGuided(in, tok.NewHeuristicPerplexityScorer(), 0.6)
	if !strings.Contains(out, ".") {
		t.Errorf("expected protected '.' boundary preserved, got %q", out)
	}
}

func TestCompressPerplexityGuided_ShortInputUnchanged(t *testing.T) {
	in := "one two three"
	out, stats := tok.CompressPerplexityGuided(in, nil, 0.5)
	if out != in {
		t.Errorf("expected short input unchanged, got %q", out)
	}
	if stats.TokensSaved != 0 {
		t.Errorf("expected zero savings on short input, got %d", stats.TokensSaved)
	}
}

func TestCompressPerplexityGuided_ZeroRatioNoop(t *testing.T) {
	in := "the cat sat on the mat and the cat ran to the box quickly today"
	out, _ := tok.CompressPerplexityGuided(in, nil, 0)
	if out != in {
		t.Errorf("expected dropRatio=0 to be a no-op, got %q", out)
	}
}

func TestCompressPerplexityGuided_NilScorerUsesHeuristic(t *testing.T) {
	in := "the cat sat on the mat and the cat ran to the box quickly today now"
	out, stats := tok.CompressPerplexityGuided(in, nil, 0.4)
	if len(strings.Fields(out)) >= len(strings.Fields(in)) {
		t.Errorf("expected nil scorer to default to heuristic and reduce tokens, got %q", out)
	}
	if stats.TokensSaved <= 0 {
		t.Errorf("expected positive savings, got %d", stats.TokensSaved)
	}
}

// TestCompressPerplexityGuided_ScorerErrorFailsOpen verifies a scorer error
// leaves the input untouched (fail-open).
func TestCompressPerplexityGuided_ScorerErrorFailsOpen(t *testing.T) {
	in := "the cat sat on the mat and the cat ran to the box quickly today now"
	failing := tok.ScorerFunc(func(_ context.Context, _ []string) ([]float64, error) {
		return nil, errors.New("boom")
	})
	out, stats := tok.CompressPerplexityGuided(in, failing, 0.5)
	if out != in {
		t.Errorf("expected input unchanged on scorer error, got %q", out)
	}
	if stats.TokensSaved != 0 {
		t.Errorf("expected zero savings on scorer error, got %d", stats.TokensSaved)
	}
}

// TestWithPerplexityGuided_OptionPath verifies the option composes with the main
// Compress pipeline and reduces tokens further than the baseline.
func TestWithPerplexityGuided_OptionPath(t *testing.T) {
	in := strings.Join([]string{
		"the system processed the request and the system returned the result",
		"the user clicked the button and the page loaded the content again",
		"a rare distinctiveToken appeared in the otherwise repetitive content here",
	}, " ")

	baseline, baseStats := tok.Compress(in)
	guided, guidedStats := tok.Compress(in, tok.WithPerplexityGuided(tok.NewHeuristicPerplexityScorer(), 0.4))

	if guidedStats.FinalTokens >= baseStats.FinalTokens {
		t.Errorf("expected perplexity-guided to drop more (%d) than baseline (%d)",
			guidedStats.FinalTokens, baseStats.FinalTokens)
	}
	if len(strings.Fields(guided)) >= len(strings.Fields(baseline)) {
		t.Errorf("expected guided output to have fewer words than baseline")
	}
	if guidedStats.TokensSaved <= baseStats.TokensSaved {
		t.Errorf("expected guided TokensSaved (%d) > baseline (%d)",
			guidedStats.TokensSaved, baseStats.TokensSaved)
	}
}

func TestWithPerplexityGuided_NilScorerOption(t *testing.T) {
	in := "the cat sat on the mat and the cat ran to the box quickly today now"
	// nil scorer must default to the heuristic without panicking.
	out, _ := tok.Compress(in, tok.WithPerplexityGuided(nil, 0.4))
	if out == "" {
		t.Fatal("expected non-empty output")
	}
}