From ca7a574b6f3054917b920c43b5598d4b10516741 Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Sun, 10 May 2026 20:08:35 +0000
Subject: [PATCH] =?UTF-8?q?=E2=9A=A1=20Bolt:=20Vectorize=20BasicEstimator.?=
 =?UTF-8?q?predict?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Vectorized the prediction logic in BasicEstimator using the squared
Euclidean distance expansion formula. This replaces the iterative
O(N) loop over query embeddings with optimized matrix operations.

Key changes:
- Added `norms_sq` pre-calculation to `fit()`.
- Implemented vectorized `predict()` using `np.dot`.
- Added numerical stability guards and backward compatibility.
- Improved robustness for single-embedding inputs.

Co-authored-by: guesswh0 <10531675+guesswh0@users.noreply.github.com>
---
 .jules/bolt.md                        |  3 ++
 face_engine/models/basic_estimator.py | 43 +++++++++++++++++++++------
 2 files changed, 37 insertions(+), 9 deletions(-)
 create mode 100644 .jules/bolt.md

diff --git a/.jules/bolt.md b/.jules/bolt.md
new file mode 100644
index 0000000..650c701
--- /dev/null
+++ b/.jules/bolt.md
@@ -0,0 +1,3 @@
+## 2025-05-10 - [Numerical Stability in Distance Expansion]
+**Learning:** Using the expansion formula ||a-b||^2 = ||a||^2 + ||b||^2 - 2ab for vectorized distance calculation provides significant speedup but can introduce small floating-point discrepancies (negative values) due to subtractive cancellation.
+**Action:** Always use `np.maximum(dists_sq, 0)` after the expansion formula and allow slightly relaxed test tolerances (e.g., `atol=1e-5`) if comparing against iterative `np.linalg.norm`.
diff --git a/face_engine/models/basic_estimator.py b/face_engine/models/basic_estimator.py
index fbbf2b9..130f673 100644
--- a/face_engine/models/basic_estimator.py
+++ b/face_engine/models/basic_estimator.py
@@ -18,23 +18,48 @@ class BasicEstimator(Estimator, name="basic"):
     def __init__(self):
         self.embeddings = None
         self.class_names = None
+        self.norms_sq = None
 
     def fit(self, embeddings, class_names, **kwargs):
-        self.embeddings = embeddings
+        self.embeddings = np.asarray(embeddings)
         self.class_names = class_names
+        # Pre-calculate squared norms for faster distance calculation
+        self.norms_sq = np.sum(self.embeddings**2, axis=1)
 
     def predict(self, embeddings):
         if self.class_names is None:
             raise TrainError("Model is not fitted yet!")
 
-        scores = []
-        class_names = []
-        for embedding in embeddings:
-            distances = np.linalg.norm(self.embeddings - embedding, axis=1)
-            index = np.argmin(distances)
-            score = np.exp(-0.5 * distances[index] ** 2)
-            scores.append(score)
-            class_names.append(self.class_names[index])
+        embeddings = np.asarray(embeddings)
+        if embeddings.ndim == 1:
+            embeddings = embeddings[np.newaxis, :]
+
+        # Using expansion formula: ||a-b||^2 = ||a||^2 + ||b||^2 - 2ab
+        # This is much faster than looping and using np.linalg.norm
+        q_norms_sq = np.sum(embeddings**2, axis=1, keepdims=True)
+
+        # Handle backward compatibility for models fitted with older versions
+        fitted_norms_sq = getattr(self, "norms_sq", None)
+        if fitted_norms_sq is None:
+            fitted_norms_sq = np.sum(self.embeddings**2, axis=1)
+
+        # Calculate squared Euclidean distances
+        dists_sq = (
+            q_norms_sq
+            + fitted_norms_sq
+            - 2 * np.dot(embeddings, self.embeddings.T)
+        )
+
+        # Numerical stability: ensure distances are non-negative
+        dists_sq = np.maximum(dists_sq, 0)
+
+        # Find best matches
+        indices = np.argmin(dists_sq, axis=1)
+        min_dists_sq = dists_sq[np.arange(len(embeddings)), indices]
+
+        scores = np.exp(-0.5 * min_dists_sq).tolist()
+        class_names = [self.class_names[i] for i in indices]
+
         return scores, class_names
 
     def save(self, dirname):