|
91 | 91 | from vcache.vcache_core.cache.eviction_policy.eviction_policy import EvictionPolicy |
92 | 92 | from vcache.vcache_core.cache.eviction_policy.strategies.scu import SCUEvictionPolicy |
93 | 93 | from vcache.vcache_core.similarity_evaluator import SimilarityEvaluator |
| 94 | +from vcache.vcache_core.similarity_evaluator.strategies.benchmark_comparison import ( |
| 95 | + BenchmarkComparisonSimilarityEvaluator, |
| 96 | +) |
| 97 | +from vcache.vcache_core.similarity_evaluator.strategies.llm_comparison import ( |
| 98 | + LLMComparisonSimilarityEvaluator, |
| 99 | +) |
94 | 100 | from vcache.vcache_core.similarity_evaluator.strategies.string_comparison import ( |
95 | 101 | StringComparisonSimilarityEvaluator, |
96 | 102 | ) |
@@ -246,19 +252,56 @@ class GeneratePlotsOnly(Enum): |
246 | 252 | int, |
247 | 253 | ] |
248 | 254 | ] = [ |
| 255 | + ( |
| 256 | + EmbeddingModel.E5_LARGE_V2, |
| 257 | + LargeLanguageModel.GPT_4O_MINI, |
| 258 | + Dataset.SEM_BENCHMARK_ARENA, |
| 259 | + GeneratePlotsOnly.NO, |
| 260 | + BenchmarkComparisonSimilarityEvaluator(), |
| 261 | + SCUEvictionPolicy(max_size=100000, watermark=0.99, eviction_percentage=0.1), |
| 262 | + 60000, |
| 263 | + ), |
249 | 264 | ( |
250 | 265 | EmbeddingModel.GTE, |
251 | | - LargeLanguageModel.LLAMA_3_70B_VLLM, |
252 | | - Dataset.SEM_BENCHMARK_CLASSIFICATION, |
| 266 | + LargeLanguageModel.LLAMA_3_8B, |
| 267 | + Dataset.SEM_BENCHMARK_ARENA, |
253 | 268 | GeneratePlotsOnly.NO, |
254 | 269 | StringComparisonSimilarityEvaluator(), |
255 | 270 | SCUEvictionPolicy(max_size=100000, watermark=0.99, eviction_percentage=0.1), |
256 | | - 60000, |
257 | | - ) |
| 271 | + 45000, |
| 272 | + ), |
| 273 | + ( |
| 274 | + EmbeddingModel.GTE, |
| 275 | + LargeLanguageModel.LLAMA_3_8B, |
| 276 | + Dataset.SEM_BENCHMARK_SEARCH_QUERIES, |
| 277 | + GeneratePlotsOnly.NO, |
| 278 | + BenchmarkComparisonSimilarityEvaluator(), |
| 279 | + SCUEvictionPolicy(max_size=160000, watermark=0.99, eviction_percentage=0.1), |
| 280 | + 150000, |
| 281 | + ), |
| 282 | + ( |
| 283 | + EmbeddingModel.OPENAI_TEXT_EMBEDDING_SMALL, |
| 284 | + LargeLanguageModel.GPT_4_1, |
| 285 | + Dataset.CUSTOM_EXAMPLE, |
| 286 | + GeneratePlotsOnly.NO, |
| 287 | + LLMComparisonSimilarityEvaluator( |
| 288 | + inference_engine=OpenAIInferenceEngine( |
| 289 | + model_name="gpt-4.1-nano-2025-04-14", temperature=0.0 |
| 290 | + ) |
| 291 | + ), |
| 292 | + SCUEvictionPolicy(max_size=2000, watermark=0.99, eviction_percentage=0.1), |
| 293 | + 50, |
| 294 | + ), |
258 | 295 | ] |
259 | 296 |
|
260 | 297 | BASELINES_TO_RUN: List[Baseline] = [ |
261 | 298 | Baseline.VCacheLocal, |
| 299 | + Baseline.IID, |
| 300 | + Baseline.GPTCache, |
| 301 | + Baseline.BerkeleyEmbedding, |
| 302 | + Baseline.SigmoidProbability, |
| 303 | + Baseline.SigmoidOnly, |
| 304 | + Baseline.VCacheBerkeleyEmbedding, |
262 | 305 | ] |
263 | 306 |
|
264 | 307 | STATIC_THRESHOLDS: List[float] = [0.80, 0.83, 0.86, 0.89, 0.92, 0.95, 0.97, 0.98, 0.99] |
|
0 commit comments