Skip to content

Commit ad50142

Browse files
Included baselines in run configuration
1 parent f4c63f8 commit ad50142

1 file changed

Lines changed: 47 additions & 4 deletions

File tree

benchmarks/benchmark.py

Lines changed: 47 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,12 @@
9191
from vcache.vcache_core.cache.eviction_policy.eviction_policy import EvictionPolicy
9292
from vcache.vcache_core.cache.eviction_policy.strategies.scu import SCUEvictionPolicy
9393
from vcache.vcache_core.similarity_evaluator import SimilarityEvaluator
94+
from vcache.vcache_core.similarity_evaluator.strategies.benchmark_comparison import (
95+
BenchmarkComparisonSimilarityEvaluator,
96+
)
97+
from vcache.vcache_core.similarity_evaluator.strategies.llm_comparison import (
98+
LLMComparisonSimilarityEvaluator,
99+
)
94100
from vcache.vcache_core.similarity_evaluator.strategies.string_comparison import (
95101
StringComparisonSimilarityEvaluator,
96102
)
@@ -246,19 +252,56 @@ class GeneratePlotsOnly(Enum):
246252
int,
247253
]
248254
] = [
255+
(
256+
EmbeddingModel.E5_LARGE_V2,
257+
LargeLanguageModel.GPT_4O_MINI,
258+
Dataset.SEM_BENCHMARK_ARENA,
259+
GeneratePlotsOnly.NO,
260+
BenchmarkComparisonSimilarityEvaluator(),
261+
SCUEvictionPolicy(max_size=100000, watermark=0.99, eviction_percentage=0.1),
262+
60000,
263+
),
249264
(
250265
EmbeddingModel.GTE,
251-
LargeLanguageModel.LLAMA_3_70B_VLLM,
252-
Dataset.SEM_BENCHMARK_CLASSIFICATION,
266+
LargeLanguageModel.LLAMA_3_8B,
267+
Dataset.SEM_BENCHMARK_ARENA,
253268
GeneratePlotsOnly.NO,
254269
StringComparisonSimilarityEvaluator(),
255270
SCUEvictionPolicy(max_size=100000, watermark=0.99, eviction_percentage=0.1),
256-
60000,
257-
)
271+
45000,
272+
),
273+
(
274+
EmbeddingModel.GTE,
275+
LargeLanguageModel.LLAMA_3_8B,
276+
Dataset.SEM_BENCHMARK_SEARCH_QUERIES,
277+
GeneratePlotsOnly.NO,
278+
BenchmarkComparisonSimilarityEvaluator(),
279+
SCUEvictionPolicy(max_size=160000, watermark=0.99, eviction_percentage=0.1),
280+
150000,
281+
),
282+
(
283+
EmbeddingModel.OPENAI_TEXT_EMBEDDING_SMALL,
284+
LargeLanguageModel.GPT_4_1,
285+
Dataset.CUSTOM_EXAMPLE,
286+
GeneratePlotsOnly.NO,
287+
LLMComparisonSimilarityEvaluator(
288+
inference_engine=OpenAIInferenceEngine(
289+
model_name="gpt-4.1-nano-2025-04-14", temperature=0.0
290+
)
291+
),
292+
SCUEvictionPolicy(max_size=2000, watermark=0.99, eviction_percentage=0.1),
293+
50,
294+
),
258295
]
259296

260297
BASELINES_TO_RUN: List[Baseline] = [
261298
Baseline.VCacheLocal,
299+
Baseline.IID,
300+
Baseline.GPTCache,
301+
Baseline.BerkeleyEmbedding,
302+
Baseline.SigmoidProbability,
303+
Baseline.SigmoidOnly,
304+
Baseline.VCacheBerkeleyEmbedding,
262305
]
263306

264307
STATIC_THRESHOLDS: List[float] = [0.80, 0.83, 0.86, 0.89, 0.92, 0.95, 0.97, 0.98, 0.99]

0 commit comments

Comments
 (0)