|
91 | 91 | from vcache.vcache_core.cache.eviction_policy.eviction_policy import EvictionPolicy |
92 | 92 | from vcache.vcache_core.cache.eviction_policy.strategies.scu import SCUEvictionPolicy |
93 | 93 | from vcache.vcache_core.similarity_evaluator import SimilarityEvaluator |
94 | | -from vcache.vcache_core.similarity_evaluator.strategies.benchmark_comparison import ( |
95 | | - BenchmarkComparisonSimilarityEvaluator, |
96 | | -) |
97 | 94 | from vcache.vcache_core.similarity_evaluator.strategies.string_comparison import ( |
98 | 95 | StringComparisonSimilarityEvaluator, |
99 | 96 | ) |
@@ -167,6 +164,12 @@ class LargeLanguageModel(Enum): |
167 | 164 |
|
168 | 165 | LLAMA_3_8B = ("response_llama_3_8b", "Llama_3_8B_Instruct", "float16", None) |
169 | 166 | LLAMA_3_70B = ("response_llama_3_70b", "Llama_3_70B_Instruct", "float16", None) |
| 167 | + LLAMA_3_70B_VLLM = ( |
| 168 | + "response_llama_3_70b_vllm", |
| 169 | + "Llama_3_70B_Instruct_VLLM", |
| 170 | + "float16", |
| 171 | + None, |
| 172 | + ) |
170 | 173 | GPT_4O_MINI = ("response_gpt-4o-mini", "GPT-4o-mini", "float16", None) |
171 | 174 | GPT_4O_NANO = ("response_gpt-4.1-nano", "GPT-4.1-nano", "float16", None) |
172 | 175 | GPT_4_1 = ("response_gpt-4.1", "gpt-4.1-2025-04-14", "float16", None) |
@@ -194,6 +197,7 @@ class Baseline(Enum): |
194 | 197 | IID = "iid" |
195 | 198 | SigmoidProbability = "SigmoidProbability" |
196 | 199 | SigmoidOnly = "SigmoidOnly" |
| 200 | + NoCache = "NoCache" |
197 | 201 |
|
198 | 202 |
|
199 | 203 | class Dataset(Enum): |
@@ -242,38 +246,19 @@ class GeneratePlotsOnly(Enum): |
242 | 246 | int, |
243 | 247 | ] |
244 | 248 | ] = [ |
245 | | - ( |
246 | | - EmbeddingModel.E5_LARGE_V2, |
247 | | - LargeLanguageModel.GPT_4O_MINI, |
248 | | - Dataset.SEM_BENCHMARK_ARENA, |
249 | | - GeneratePlotsOnly.YES, |
250 | | - BenchmarkComparisonSimilarityEvaluator(), |
251 | | - SCUEvictionPolicy(max_size=100000, watermark=0.99, eviction_percentage=0.1), |
252 | | - 60000, |
253 | | - ), |
254 | 249 | ( |
255 | 250 | EmbeddingModel.GTE, |
256 | | - LargeLanguageModel.LLAMA_3_8B, |
| 251 | + LargeLanguageModel.LLAMA_3_70B_VLLM, |
257 | 252 | Dataset.SEM_BENCHMARK_CLASSIFICATION, |
258 | | - GeneratePlotsOnly.YES, |
| 253 | + GeneratePlotsOnly.NO, |
259 | 254 | StringComparisonSimilarityEvaluator(), |
260 | 255 | SCUEvictionPolicy(max_size=100000, watermark=0.99, eviction_percentage=0.1), |
261 | | - 45000, |
262 | | - ), |
263 | | - ( |
264 | | - EmbeddingModel.GTE, |
265 | | - LargeLanguageModel.LLAMA_3_8B, |
266 | | - Dataset.SEM_BENCHMARK_SEARCH_QUERIES, |
267 | | - GeneratePlotsOnly.YES, |
268 | | - StringComparisonSimilarityEvaluator(), |
269 | | - SCUEvictionPolicy(max_size=100000, watermark=0.99, eviction_percentage=0.1), |
270 | | - 150000, |
271 | | - ), |
| 256 | + 60000, |
| 257 | + ) |
272 | 258 | ] |
273 | 259 |
|
274 | 260 | BASELINES_TO_RUN: List[Baseline] = [ |
275 | | - Baseline.SigmoidProbability, |
276 | | - Baseline.SigmoidOnly, |
| 261 | + Baseline.VCacheLocal, |
277 | 262 | ] |
278 | 263 |
|
279 | 264 | STATIC_THRESHOLDS: List[float] = [0.80, 0.83, 0.86, 0.89, 0.92, 0.95, 0.97, 0.98, 0.99] |
|
0 commit comments