diff --git a/.jules/bolt.md b/.jules/bolt.md new file mode 100644 index 0000000..1fc52b9 --- /dev/null +++ b/.jules/bolt.md @@ -0,0 +1,5 @@ +## 2026-05-01 - Vectorizing distance computation in BasicEstimator + +**Learning:** Using the squared distance expansion formula $\|a-b\|^2 = \|a\|^2 + \|b\|^2 - 2a \cdot b$ allows for full vectorization of Euclidean distance calculations using optimized BLAS routines (via `np.dot`). This is significantly faster than `np.linalg.norm` in a loop. Pre-calculating $\|b\|^2$ during the `fit` phase further optimizes the `predict` phase. However, subtractive cancellation can lead to tiny negative values for identical vectors, necessitating `np.maximum(dists_sq, 0)` before further processing (like `np.exp`). + +**Action:** Always prefer matrix-based distance calculations for batch operations and pre-calculate norms where possible. Use `np.maximum(..., 0)` to guard against floating-point noise in distance expansion formulas. diff --git a/face_engine/models/basic_estimator.py b/face_engine/models/basic_estimator.py index fbbf2b9..6b2d88e 100644 --- a/face_engine/models/basic_estimator.py +++ b/face_engine/models/basic_estimator.py @@ -22,19 +22,41 @@ def __init__(self): def fit(self, embeddings, class_names, **kwargs): self.embeddings = embeddings self.class_names = class_names + # Pre-calculate squared norms for faster distance computation in predict + self.norms_sq = np.sum(np.square(self.embeddings), axis=1) def predict(self, embeddings): if self.class_names is None: raise TrainError("Model is not fitted yet!") - scores = [] - class_names = [] - for embedding in embeddings: - distances = np.linalg.norm(self.embeddings - embedding, axis=1) - index = np.argmin(distances) - score = np.exp(-0.5 * distances[index] ** 2) - scores.append(score) - class_names.append(self.class_names[index]) + if len(embeddings) == 0: + return [], [] + + # Vectorized distance computation using ||a-b||^2 = ||a||^2 + ||b||^2 - 2ab + # This is significantly faster than iterative approach for large datasets. + a = embeddings + b = self.embeddings + + a_norms_sq = np.sum(np.square(a), axis=1) + b_norms_sq = getattr(self, "norms_sq", None) + if b_norms_sq is None: + # Fallback for models fitted with older versions + b_norms_sq = np.sum(np.square(b), axis=1) + + # dists_sq shape: (n_predict, n_fitted) + dists_sq = ( + a_norms_sq[:, np.newaxis] + b_norms_sq[np.newaxis, :] - 2 * np.dot(a, b.T) + ) + + indices = np.argmin(dists_sq, axis=1) + min_dists_sq = dists_sq[np.arange(len(a)), indices] + + # Ensure we don't have negative values due to floating point noise + min_dists_sq = np.maximum(min_dists_sq, 0) + + scores = np.exp(-0.5 * min_dists_sq).tolist() + class_names = [self.class_names[i] for i in indices] + return scores, class_names def save(self, dirname):