Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .jules/bolt.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
## 2025-05-10 - [Numerical Stability in Distance Expansion]
**Learning:** Using the expansion formula ||a-b||^2 = ||a||^2 + ||b||^2 - 2ab for vectorized distance calculation provides significant speedup but can introduce small floating-point discrepancies (negative values) due to subtractive cancellation.
**Action:** Always use `np.maximum(dists_sq, 0)` after the expansion formula and allow slightly relaxed test tolerances (e.g., `atol=1e-5`) if comparing against iterative `np.linalg.norm`.
43 changes: 34 additions & 9 deletions face_engine/models/basic_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,23 +18,48 @@ class BasicEstimator(Estimator, name="basic"):
def __init__(self):
self.embeddings = None
self.class_names = None
self.norms_sq = None

def fit(self, embeddings, class_names, **kwargs):
self.embeddings = embeddings
self.embeddings = np.asarray(embeddings)
self.class_names = class_names
# Pre-calculate squared norms for faster distance calculation
self.norms_sq = np.sum(self.embeddings**2, axis=1)

def predict(self, embeddings):
if self.class_names is None:
raise TrainError("Model is not fitted yet!")

scores = []
class_names = []
for embedding in embeddings:
distances = np.linalg.norm(self.embeddings - embedding, axis=1)
index = np.argmin(distances)
score = np.exp(-0.5 * distances[index] ** 2)
scores.append(score)
class_names.append(self.class_names[index])
embeddings = np.asarray(embeddings)
if embeddings.ndim == 1:
embeddings = embeddings[np.newaxis, :]

# Using expansion formula: ||a-b||^2 = ||a||^2 + ||b||^2 - 2ab
# This is much faster than looping and using np.linalg.norm
q_norms_sq = np.sum(embeddings**2, axis=1, keepdims=True)

# Handle backward compatibility for models fitted with older versions
fitted_norms_sq = getattr(self, "norms_sq", None)
if fitted_norms_sq is None:
fitted_norms_sq = np.sum(self.embeddings**2, axis=1)

# Calculate squared Euclidean distances
dists_sq = (
q_norms_sq
+ fitted_norms_sq
- 2 * np.dot(embeddings, self.embeddings.T)
)

# Numerical stability: ensure distances are non-negative
dists_sq = np.maximum(dists_sq, 0)

# Find best matches
indices = np.argmin(dists_sq, axis=1)
min_dists_sq = dists_sq[np.arange(len(embeddings)), indices]

scores = np.exp(-0.5 * min_dists_sq).tolist()
class_names = [self.class_names[i] for i in indices]

return scores, class_names

def save(self, dirname):
Expand Down