From eb2f14c92ec5541766743130e9452c28fd3cdad6 Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Sun, 17 May 2026 20:08:06 +0000
Subject: [PATCH] =?UTF-8?q?=E2=9A=A1=20Bolt:=20Vectorized=20face=20predict?=
 =?UTF-8?q?ion=20and=20detection=20optimization?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Vectorize BasicEstimator.predict using squared distance expansion formula.
- Pre-calculate squared norms in fit to avoid redundant computation.
- Vectorize bounding box area calculation in FaceEngine.find_faces.
- Fix bug in extra metadata indexing in find_faces.
- Add backward compatibility for loaded models.
- Add guard clause for empty input embeddings.

Co-authored-by: guesswh0 <10531675+guesswh0@users.noreply.github.com>
---
 .jules/bolt.md                        |  3 ++
 face_engine/core.py                   | 13 +++++---
 face_engine/models/basic_estimator.py | 45 +++++++++++++++++++++------
 3 files changed, 47 insertions(+), 14 deletions(-)
 create mode 100644 .jules/bolt.md

diff --git a/.jules/bolt.md b/.jules/bolt.md
new file mode 100644
index 0000000..5a032ca
--- /dev/null
+++ b/.jules/bolt.md
@@ -0,0 +1,3 @@
+## 2025-05-15 - Vectorized Distance Expansion Formula Numerical Stability
+**Learning:** Using the expansion formula ||a-b||^2 = ||a||^2 + ||b||^2 - 2ab for vectorized distance calculation provides significant speedup (leveraging BLAS dot product) but can introduce small floating-point discrepancies (negative values) due to subtractive cancellation.
+**Action:** Always use `np.maximum(dists_sq, 0)` when using the expansion formula and allow slightly relaxed test tolerances (e.g., `rtol=1e-4`) if comparing against `np.linalg.norm`. Avoid redundant square root calculations if the downstream formula (like Radial Basis Function) uses squared distance directly.
diff --git a/face_engine/core.py b/face_engine/core.py
index 582c981..88ac951 100644
--- a/face_engine/core.py
+++ b/face_engine/core.py
@@ -364,14 +364,17 @@ def find_faces(
         n_det = len(bbs)
         if isinstance(limit, int) and limit < n_det:
             if self.detector in ["hog", "mmod"]:
-                indices = range(limit)
+                indices = np.arange(limit)
             else:
-                indices = np.argsort([(bb[2] - bb[0]) * (bb[3] - bb[1]) for bb in bbs])[
-                    ::-1
-                ][:limit]
+                # Vectorized bounding box area calculation
+                areas = (bbs[:, 2] - bbs[:, 0]) * (bbs[:, 3] - bbs[:, 1])
+                indices = np.argsort(areas)[::-1][:limit]
             # limit extra fields if any exist
             for key, value in extra.items():
-                extra[key] = extra[key][limit]
+                if isinstance(value, np.ndarray):
+                    extra[key] = value[indices]
+                elif isinstance(value, list):
+                    extra[key] = [value[i] for i in indices]
             bbs = bbs[indices]
 
         if normalize:
diff --git a/face_engine/models/basic_estimator.py b/face_engine/models/basic_estimator.py
index fbbf2b9..c9d5205 100644
--- a/face_engine/models/basic_estimator.py
+++ b/face_engine/models/basic_estimator.py
@@ -18,24 +18,51 @@ class BasicEstimator(Estimator, name="basic"):
     def __init__(self):
         self.embeddings = None
         self.class_names = None
+        self.fitted_norms_sq = None
 
     def fit(self, embeddings, class_names, **kwargs):
         self.embeddings = embeddings
         self.class_names = class_names
+        # Pre-calculate squared norms for faster distance computation
+        self.fitted_norms_sq = np.sum(self.embeddings**2, axis=1)
 
     def predict(self, embeddings):
         if self.class_names is None:
             raise TrainError("Model is not fitted yet!")
 
-        scores = []
-        class_names = []
-        for embedding in embeddings:
-            distances = np.linalg.norm(self.embeddings - embedding, axis=1)
-            index = np.argmin(distances)
-            score = np.exp(-0.5 * distances[index] ** 2)
-            scores.append(score)
-            class_names.append(self.class_names[index])
-        return scores, class_names
+        # Ensure embeddings are a numpy array
+        embeddings = np.asarray(embeddings)
+        if embeddings.size == 0:
+            return [], []
+
+        # Vectorized distance calculation using expansion formula:
+        # ||a - b||^2 = ||a||^2 + ||b||^2 - 2ab
+        # backward compatibility check for fitted_norms_sq
+        if not hasattr(self, "fitted_norms_sq") or self.fitted_norms_sq is None:
+            self.fitted_norms_sq = np.sum(self.embeddings**2, axis=1)
+
+        input_norms_sq = np.sum(embeddings**2, axis=1)
+        dot_product = np.dot(embeddings, self.embeddings.T)
+
+        # dists_sq has shape (n_input, n_fitted)
+        dists_sq = (
+            input_norms_sq[:, np.newaxis] + self.fitted_norms_sq[np.newaxis, :] - 2 * dot_product
+        )
+
+        # Numerical stability: distances should not be negative
+        dists_sq = np.maximum(dists_sq, 0)
+
+        # Find the index of the minimum distance for each input embedding
+        indices = np.argmin(dists_sq, axis=1)
+
+        # Radial Basis Function (RBF) kernel as a confidence score
+        # Using dists_sq directly to avoid redundant square root calculations
+        scores = np.exp(-0.5 * dists_sq[np.arange(len(indices)), indices])
+
+        # Map indices to class names
+        class_names = [self.class_names[i] for i in indices]
+
+        return scores.tolist(), class_names
 
     def save(self, dirname):
         name = "%s.estimator.%s" % (self.name, "p")