Skip to content

Commit 302c0fd

Browse files
committed
Fix Lining Errors [2] (#63)
1 parent de4ef61 commit 302c0fd

1 file changed

Lines changed: 33 additions & 39 deletions

File tree

vcache/vcache_policy/strategies/benchmark_iid_verified.py

Lines changed: 33 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
# vCache in multi-threaded applications. This is a library-level fix.
2727
os.environ["TOKENIZERS_PARALLELISM"] = "true"
2828

29+
2930
# TODO(aditya) pull out CallbackQueue out of strategy and use uniformly across vCache baselines
3031
class CallbackQueue(queue.Queue):
3132
"""
@@ -188,7 +189,7 @@ def process_request(
188189
189190
Returns:
190191
Tuple containing [is_cache_hit, actual_response, nn_metadata_object].
191-
"""
192+
"""
192193
if self.inference_engine is None or self.cache is None:
193194
raise ValueError("Policy has not been setup")
194195

@@ -218,7 +219,6 @@ def process_request(
218219
EmbeddingMetadataObj(embedding_id=-1, response=""),
219220
)
220221

221-
222222
action = self.bayesian.select_action(
223223
similarity_score=similarity_score, metadata=nn_metadata
224224
)
@@ -420,7 +420,6 @@ def __init__(self, delta: float):
420420
self.P_c: float = 1.0 - self.delta
421421
self.epsilon_grid: np.ndarray = np.linspace(1e-6, 1 - 1e-6, 50)
422422
self.thold_grid: np.ndarray = np.linspace(0, 1, 100)
423-
424423

425424
def add_observation_to_metadata(
426425
self, similarity_score: float, is_correct: bool, metadata: EmbeddingMetadataObj
@@ -450,14 +449,14 @@ def wilson_proportion_ci(self, cdf_estimates, n, confidence):
450449
Returns:
451450
- ci_low, ci_upp : np.ndarray, lower and upper bounds of the confidence interval
452451
"""
453-
k = np.asarray((cdf_estimates * n).astype(int)) # (1, tholds,1)
454-
n = np.asarray(n) # 1
452+
k = np.asarray((cdf_estimates * n).astype(int)) # (1, tholds,1)
453+
n = np.asarray(n) # 1
455454

456455
assert np.all((0 <= k) & (k <= n)), "k must be between 0 and n"
457456
assert np.all(n > 0), "n must be > 0"
458457

459-
p_hat = k / n # (1, tholds,1)
460-
z = norm.ppf(confidence) # this is single sided # (1,1,epsilons)
458+
p_hat = k / n # (1, tholds,1)
459+
z = norm.ppf(confidence) # this is single sided # (1,1,epsilons)
461460

462461
denom = 1 + z**2 / n
463462
center = (p_hat + z**2 / (2 * n)) / denom
@@ -466,7 +465,7 @@ def wilson_proportion_ci(self, cdf_estimates, n, confidence):
466465
ci_low = center - margin
467466
ci_upp = center + margin
468467

469-
return ci_low, ci_upp #(1,tholds,epsilons)
468+
return ci_low, ci_upp # (1,tholds,epsilons)
470469

471470
def select_action(
472471
self, similarity_score: float, metadata: EmbeddingMetadataObj
@@ -486,55 +485,50 @@ def select_action(
486485

487486
if len(similarities) < 6 or len(labels) < 6:
488487
return _Action.EXPLORE
489-
num_positive_samples = np.sum(labels==1)
490-
num_negative_samples = np.sum(labels==0)
491-
488+
num_positive_samples = np.sum(labels == 1)
489+
num_negative_samples = np.sum(labels == 0)
490+
492491
# ( for vectorization , [samples, tholds, epsilon])
493-
negative_samples = similarities[labels==0].reshape(-1,1,1)
494-
labels = labels.reshape(-1,1,1)
495-
tholds = self.thold_grid.reshape(1,-1,1)
492+
negative_samples = similarities[labels == 0].reshape(-1, 1, 1)
493+
labels = labels.reshape(-1, 1, 1)
494+
tholds = self.thold_grid.reshape(1, -1, 1)
496495
deltap = (
497496
self.delta * (num_negative_samples + num_positive_samples)
498497
) / num_negative_samples
499498

500-
epsilon = self.epsilon_grid[self.epsilon_grid < deltap].reshape(1,1,-1)
501-
499+
epsilon = self.epsilon_grid[self.epsilon_grid < deltap].reshape(1, 1, -1)
500+
502501
cdf_estimate = (
503-
np.sum(negative_samples < tholds, axis=0, keepdims=True) /
504-
num_negative_samples
502+
np.sum(negative_samples < tholds, axis=0, keepdims=True)
503+
/ num_negative_samples
505504
) # (1, tholds, 1)
506505
cdf_ci_lower, cdf_ci_upper = self.wilson_proportion_ci(
507506
cdf_estimate, num_negative_samples, confidence=1 - epsilon
508507
) # (1, tholds, epsilon)
509-
508+
510509
# adjust for positive samples (1,1,epsilon)
511-
pc_adjusted = 1 - (deltap - epsilon) / (1 - epsilon)
512-
510+
pc_adjusted = 1 - (deltap - epsilon) / (1 - epsilon)
513511

514512
t_hats = (
515-
(np.sum(cdf_estimate > pc_adjusted, axis=1, keepdims=True) == 0) * 1.0
516-
+ (
517-
1 - (np.sum(cdf_estimate > pc_adjusted, axis=1, keepdims=True) == 0)
518-
)
519-
* self.thold_grid[
520-
np.argmax(cdf_estimate > pc_adjusted, axis=1, keepdims=True)
521-
]
522-
)
513+
np.sum(cdf_estimate > pc_adjusted, axis=1, keepdims=True) == 0
514+
) * 1.0 + (
515+
1 - (np.sum(cdf_estimate > pc_adjusted, axis=1, keepdims=True) == 0)
516+
) * self.thold_grid[
517+
np.argmax(cdf_estimate > pc_adjusted, axis=1, keepdims=True)
518+
]
523519
t_primes = (
524-
(np.sum(cdf_ci_lower > pc_adjusted, axis=1, keepdims=True) == 0) * 1.0
525-
+ (
526-
1 - (np.sum(cdf_ci_lower > pc_adjusted, axis=1, keepdims=True) == 0)
527-
)
528-
* self.thold_grid[
529-
np.argmax(cdf_ci_lower > pc_adjusted, axis=1, keepdims=True)
530-
]
531-
)
532-
520+
np.sum(cdf_ci_lower > pc_adjusted, axis=1, keepdims=True) == 0
521+
) * 1.0 + (
522+
1 - (np.sum(cdf_ci_lower > pc_adjusted, axis=1, keepdims=True) == 0)
523+
) * self.thold_grid[
524+
np.argmax(cdf_ci_lower > pc_adjusted, axis=1, keepdims=True)
525+
]
526+
533527
t_hat = np.min(t_hats)
534528
t_prime = np.min(t_primes)
535529
metadata.t_prime = t_prime
536530
metadata.t_hat = t_hat
537-
metadata.var_t = -1 # not computed
531+
metadata.var_t = -1 # not computed
538532

539533
if similarity_score <= t_prime:
540534
return _Action.EXPLORE

0 commit comments

Comments
 (0)