Skip to content

Commit 40454f8

Browse files
committed
Merge branch 'feature/84-overlap' into develop
2 parents c299318 + e920ed0 commit 40454f8

9 files changed

Lines changed: 199 additions & 54 deletions

File tree

backend/app/api/v1/endpoint_modules/search.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,8 @@ async def _handle_search(request: Request, params: dict) -> JSONResponse:
9999
rid = None
100100
score = None
101101
overlap = None
102+
containment = None
103+
spatial_score = None
102104
if isinstance(item, dict):
103105
rid = (
104106
item.get("id")
@@ -113,8 +115,22 @@ async def _handle_search(request: Request, params: dict) -> JSONResponse:
113115
overlap = item.get("bbox_overlap_ratio")
114116
if overlap is None:
115117
overlap = item.get("attributes", {}).get("bbox_overlap_ratio")
118+
containment = item.get("bbox_containment_ratio")
119+
if containment is None:
120+
containment = item.get("attributes", {}).get("bbox_containment_ratio")
121+
spatial_score = item.get("bbox_spatial_score")
122+
if spatial_score is None:
123+
spatial_score = item.get("attributes", {}).get("bbox_spatial_score")
116124
if rid:
117-
resource_data.append({"id": rid, "score": score, "bbox_overlap_ratio": overlap})
125+
resource_data.append(
126+
{
127+
"id": rid,
128+
"score": score,
129+
"bbox_overlap_ratio": overlap,
130+
"bbox_containment_ratio": containment,
131+
"bbox_spatial_score": spatial_score,
132+
}
133+
)
118134

119135
# Step 3: Batch fetch resource data
120136
lookup = {}
@@ -161,13 +177,19 @@ async def _handle_search(request: Request, params: dict) -> JSONResponse:
161177
# from create_jsonapi_resource
162178
attrs = obj.get("attributes", {})
163179

164-
# Attach ES scoring and overlap ratio info into per-resource meta for debugging
180+
# Attach ES scoring and bbox spatial metrics into per-resource meta for debugging
165181
if rd.get("score") is not None:
166182
obj.setdefault("meta", {})
167183
obj["meta"]["score"] = rd["score"]
168184
if rd.get("bbox_overlap_ratio") is not None:
169185
obj.setdefault("meta", {})
170186
obj["meta"]["bbox_overlap_ratio"] = rd["bbox_overlap_ratio"]
187+
if rd.get("bbox_containment_ratio") is not None:
188+
obj.setdefault("meta", {})
189+
obj["meta"]["bbox_containment_ratio"] = rd["bbox_containment_ratio"]
190+
if rd.get("bbox_spatial_score") is not None:
191+
obj.setdefault("meta", {})
192+
obj["meta"]["bbox_spatial_score"] = rd["bbox_spatial_score"]
171193

172194
if isinstance(fields, str) and fields.strip():
173195
# Handle field filtering for nested attributes structure

backend/app/elasticsearch/search.py

Lines changed: 119 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@
3535
NEAR_GLOBAL_DIAGONAL_KM = 15_000
3636
MIN_BBOX_IOU_OVERLAP_RATIO = float(os.getenv("MIN_BBOX_IOU_OVERLAP_RATIO", "0.001"))
3737
ALLOWED_GEO_RELATIONS = {"intersects", "within", "contains", "disjoint"}
38+
BBOX_CONTAINMENT_WEIGHT = float(os.getenv("BBOX_CONTAINMENT_WEIGHT", "0.7"))
39+
BBOX_IOU_WEIGHT = float(os.getenv("BBOX_IOU_WEIGHT", "0.3"))
40+
BBOX_SPATIAL_BOOST_WEIGHT = float(os.getenv("BBOX_SPATIAL_BOOST_WEIGHT", "0.8"))
3841

3942

4043
def _escape_query_string_brackets(query_text: str) -> str:
@@ -346,6 +349,61 @@ def _normalize_min_overlap_ratio(raw: object) -> float:
346349
return value
347350

348351

352+
def _normalized_spatial_weights() -> tuple[float, float]:
353+
containment_weight = max(0.0, BBOX_CONTAINMENT_WEIGHT)
354+
overlap_weight = max(0.0, BBOX_IOU_WEIGHT)
355+
total = containment_weight + overlap_weight
356+
if total <= 0.0:
357+
return 0.7, 0.3
358+
return containment_weight / total, overlap_weight / total
359+
360+
361+
def _compute_bbox_spatial_metrics(
362+
*,
363+
d_minx: float,
364+
d_maxx: float,
365+
d_miny: float,
366+
d_maxy: float,
367+
q_minx: float,
368+
q_maxx: float,
369+
q_miny: float,
370+
q_maxy: float,
371+
) -> dict[str, float]:
372+
ix1 = max(d_minx, q_minx)
373+
iy1 = max(d_miny, q_miny)
374+
ix2 = min(d_maxx, q_maxx)
375+
iy2 = min(d_maxy, q_maxy)
376+
377+
iw = max(0.0, ix2 - ix1)
378+
ih = max(0.0, iy2 - iy1)
379+
intersection = iw * ih
380+
doc_area = max(0.0, (d_maxx - d_minx) * (d_maxy - d_miny))
381+
query_area = max(0.0, (q_maxx - q_minx) * (q_maxy - q_miny))
382+
383+
if intersection <= 0.0 or doc_area <= 0.0 or query_area <= 0.0:
384+
return {
385+
"overlap_ratio": 0.0,
386+
"containment_ratio": 0.0,
387+
"spatial_score": 0.0,
388+
}
389+
390+
union_area = doc_area + query_area - intersection
391+
overlap_ratio = 0.0 if union_area <= 0.0 else intersection / union_area
392+
containment_ratio = intersection / doc_area
393+
394+
overlap_ratio = min(max(overlap_ratio, 0.0), 1.0)
395+
containment_ratio = min(max(containment_ratio, 0.0), 1.0)
396+
397+
containment_weight, overlap_weight = _normalized_spatial_weights()
398+
spatial_score = containment_weight * containment_ratio + overlap_weight * overlap_ratio
399+
400+
return {
401+
"overlap_ratio": overlap_ratio,
402+
"containment_ratio": containment_ratio,
403+
"spatial_score": min(max(spatial_score, 0.0), 1.0),
404+
}
405+
406+
349407
def _build_bbox_overlap_filter(
350408
*,
351409
q_minx: float,
@@ -973,14 +1031,14 @@ async def search_resources(
9731031
bool_query_dict["must_not"] = combined_must_not
9741032

9751033
# Base query is a plain bool; we will wrap it in script_score when we have
976-
# bbox info for overlap-based relevance.
1034+
# bbox info for spatial reranking.
9771035
base_query = {"query": {"bool": bool_query_dict}}
9781036
overlap_context = None
9791037

980-
# Add bbox overlap-based scoring when bbox filter is present.
981-
# This uses an approximate IoU between the document's bbox and the query bbox,
982-
# computed from numeric bbox_* fields and the query bbox bounds, and does NOT
983-
# use centroids at all.
1038+
# Add bbox spatial scoring when bbox filter is present.
1039+
# This combines document containment within the query bbox and IoU
1040+
# extent similarity using numeric bbox_* fields, and does NOT use
1041+
# centroids at all.
9841042
if bbox_filter_info:
9851043
top_left = bbox_filter_info["top_left"]
9861044
bottom_right = bbox_filter_info["bottom_right"]
@@ -991,8 +1049,10 @@ async def search_resources(
9911049
q_miny = min(float(bottom_right["lat"]), float(top_left["lat"]))
9921050
q_maxy = max(float(bottom_right["lat"]), float(top_left["lat"]))
9931051

994-
# Persist query bbox bounds so we can later compute a concrete
995-
# bbox_overlap_ratio per hit in Python for the API meta block.
1052+
containment_weight, overlap_weight = _normalized_spatial_weights()
1053+
1054+
# Persist query bbox bounds so we can later compute concrete bbox
1055+
# spatial metrics per hit in Python for the API meta block.
9961056
overlap_context = {
9971057
"qMinX": q_minx,
9981058
"qMaxX": q_maxx,
@@ -1068,6 +1128,15 @@ async def search_resources(
10681128
return 0.0;
10691129
}
10701130
1131+
// Prefer records whose mapped extent is mostly inside
1132+
// the user's view, while still rewarding similar extent.
1133+
double containmentRatio = intersection / docArea;
1134+
if (containmentRatio < 0.0) {
1135+
containmentRatio = 0.0;
1136+
} else if (containmentRatio > 1.0) {
1137+
containmentRatio = 1.0;
1138+
}
1139+
10711140
// Overlap similarity: IoU between document bbox and query bbox.
10721141
// This is high (near 1.0) only when the two extents are similar
10731142
// in both size and location.
@@ -1078,16 +1147,30 @@ async def search_resources(
10781147
overlapRatio = 1.0;
10791148
}
10801149
1081-
// Combine base score (text relevance when present) with IoU.
1150+
double spatialScore =
1151+
(params.containmentWeight * containmentRatio) +
1152+
(params.overlapWeight * overlapRatio);
1153+
1154+
if (spatialScore < 0.0) {
1155+
spatialScore = 0.0;
1156+
} else if (spatialScore > 1.0) {
1157+
spatialScore = 1.0;
1158+
}
1159+
1160+
// Combine base text relevance with a spatial boost.
10821161
double baseScore = _score;
1083-
// Keep scores positive and emphasize high-overlap maps.
1084-
return baseScore * (0.1 + 0.9 * overlapRatio);
1162+
return baseScore * (
1163+
1.0 + (params.spatialBoostWeight * spatialScore)
1164+
);
10851165
""",
10861166
"params": {
10871167
"qMinX": q_minx,
10881168
"qMaxX": q_maxx,
10891169
"qMinY": q_miny,
10901170
"qMaxY": q_maxy,
1171+
"containmentWeight": containment_weight,
1172+
"overlapWeight": overlap_weight,
1173+
"spatialBoostWeight": max(0.0, BBOX_SPATIAL_BOOST_WEIGHT),
10911174
},
10921175
},
10931176
}
@@ -1388,14 +1471,14 @@ async def process_search_response(
13881471
[resource["id"] for resource in resource_rows]
13891472
)
13901473

1391-
# Precompute lookups from id -> score and id -> bbox_overlap_ratio so we can
1392-
# expose them in the API layer meta block. The ratio is computed as the
1393-
# fraction of the document bbox area that lies inside the query bbox,
1394-
# mirroring the Painless scoring script semantics.
1474+
# Precompute lookups from id -> score and bbox spatial metrics so we can
1475+
# expose them in the API layer meta block.
13951476
id_to_score: dict[str, float] = {}
13961477
id_to_overlap: dict[str, float] = {}
1478+
id_to_containment: dict[str, float] = {}
1479+
id_to_spatial_score: dict[str, float] = {}
13971480

1398-
def _compute_overlap_ratio(hit_dict: dict, ctx: dict) -> float | None:
1481+
def _compute_spatial_metrics(hit_dict: dict, ctx: dict) -> dict[str, float] | None:
13991482
try:
14001483
src = hit_dict.get("_source", {})
14011484
d_minx = float(src["bbox_minx"])
@@ -1410,43 +1493,34 @@ def _compute_overlap_ratio(hit_dict: dict, ctx: dict) -> float | None:
14101493
q_miny = float(ctx["qMinY"])
14111494
q_maxy = float(ctx["qMaxY"])
14121495

1413-
ix1 = max(d_minx, q_minx)
1414-
iy1 = max(d_miny, q_miny)
1415-
ix2 = min(d_maxx, q_maxx)
1416-
iy2 = min(d_maxy, q_maxy)
1417-
1418-
iw = max(0.0, ix2 - ix1)
1419-
ih = max(0.0, iy2 - iy1)
1420-
intersection = iw * ih
1421-
doc_area = max(0.0, (d_maxx - d_minx) * (d_maxy - d_miny))
1422-
query_area = max(0.0, (q_maxx - q_minx) * (q_maxy - q_miny))
1423-
if intersection <= 0.0 or doc_area <= 0.0 or query_area <= 0.0:
1424-
return 0.0
1425-
1426-
union_area = doc_area + query_area - intersection
1427-
if union_area <= 0.0:
1428-
return 0.0
1429-
1430-
ratio = intersection / union_area
1431-
if ratio < 0.0:
1432-
ratio = 0.0
1433-
elif ratio > 1.0:
1434-
ratio = 1.0
1435-
return ratio
1496+
return _compute_bbox_spatial_metrics(
1497+
d_minx=d_minx,
1498+
d_maxx=d_maxx,
1499+
d_miny=d_miny,
1500+
d_maxy=d_maxy,
1501+
q_minx=q_minx,
1502+
q_maxx=q_maxx,
1503+
q_miny=q_miny,
1504+
q_maxy=q_maxy,
1505+
)
14361506

14371507
for hit in hits:
14381508
rid = hit["_source"]["id"]
14391509
id_to_score[rid] = hit.get("_score", 0.0)
14401510
if overlap_context:
1441-
ratio = _compute_overlap_ratio(hit, overlap_context)
1442-
if ratio is not None:
1443-
id_to_overlap[rid] = ratio
1511+
metrics = _compute_spatial_metrics(hit, overlap_context)
1512+
if metrics is not None:
1513+
id_to_overlap[rid] = metrics["overlap_ratio"]
1514+
id_to_containment[rid] = metrics["containment_ratio"]
1515+
id_to_spatial_score[rid] = metrics["spatial_score"]
14441516

14451517
for resource in resource_rows:
14461518
rid = resource["id"]
14471519
distribution_context = distribution_contexts.get(rid)
14481520
score = id_to_score.get(rid, 0.0)
14491521
overlap_ratio = id_to_overlap.get(rid)
1522+
containment_ratio = id_to_containment.get(rid)
1523+
spatial_score = id_to_spatial_score.get(rid)
14501524

14511525
doc: dict = {
14521526
"type": "document",
@@ -1459,6 +1533,10 @@ def _compute_overlap_ratio(hit_dict: dict, ctx: dict) -> float | None:
14591533
}
14601534
if overlap_ratio is not None:
14611535
doc["bbox_overlap_ratio"] = overlap_ratio
1536+
if containment_ratio is not None:
1537+
doc["bbox_containment_ratio"] = containment_ratio
1538+
if spatial_score is not None:
1539+
doc["bbox_spatial_score"] = spatial_score
14621540

14631541
processed_resources.append(doc)
14641542

backend/tests/elasticsearch/test_search.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@
77
import pytest
88

99
from app.elasticsearch.search import (
10+
BBOX_SPATIAL_BOOST_WEIGHT,
1011
MIN_BBOX_IOU_OVERLAP_RATIO,
12+
_compute_bbox_spatial_metrics,
1113
_escape_query_string_brackets,
1214
get_search_criteria,
1315
search_resources,
@@ -49,6 +51,42 @@ def test_get_search_criteria(self):
4951
assert criteria["filters"] == {"dct_spatial_sm": ["Minnesota"]}
5052
assert criteria["sort"] == [{"_score": "desc"}]
5153

54+
def test_compute_bbox_spatial_metrics_rewards_containment(self):
55+
"""Contained extents should still score well even when query bbox is larger."""
56+
metrics = _compute_bbox_spatial_metrics(
57+
d_minx=-93.4,
58+
d_maxx=-93.1,
59+
d_miny=44.9,
60+
d_maxy=45.1,
61+
q_minx=-93.5,
62+
q_maxx=-92.9,
63+
q_miny=44.8,
64+
q_maxy=45.2,
65+
)
66+
67+
assert metrics["containment_ratio"] == pytest.approx(1.0)
68+
assert metrics["overlap_ratio"] < metrics["containment_ratio"]
69+
assert metrics["spatial_score"] > metrics["overlap_ratio"]
70+
71+
def test_compute_bbox_spatial_metrics_returns_zero_for_no_overlap(self):
72+
"""Non-overlapping extents should not receive a spatial boost."""
73+
metrics = _compute_bbox_spatial_metrics(
74+
d_minx=-100.0,
75+
d_maxx=-99.0,
76+
d_miny=40.0,
77+
d_maxy=41.0,
78+
q_minx=-93.5,
79+
q_maxx=-92.9,
80+
q_miny=44.8,
81+
q_maxy=45.2,
82+
)
83+
84+
assert metrics == {
85+
"overlap_ratio": 0.0,
86+
"containment_ratio": 0.0,
87+
"spatial_score": 0.0,
88+
}
89+
5290
@pytest.mark.asyncio
5391
async def test_search_resources_with_id_field_in_query_string(self):
5492
"""Test that search_resources includes id field in query_string query."""
@@ -357,6 +395,13 @@ async def test_search_resources_with_geospatial_bbox_filter(self):
357395
params = script_filter["script"]["script"]["params"]
358396
assert params["minOverlapRatio"] == MIN_BBOX_IOU_OVERLAP_RATIO
359397

398+
script_score = search_query["script_score"]["script"]
399+
assert "containmentRatio" in script_score["source"]
400+
assert "spatialScore" in script_score["source"]
401+
assert script_score["params"]["spatialBoostWeight"] == pytest.approx(
402+
BBOX_SPATIAL_BOOST_WEIGHT
403+
)
404+
360405
@pytest.mark.asyncio
361406
async def test_search_resources_with_geospatial_bbox_relation(self):
362407
"""BBox include filter should honor relation parameter."""

frontend/src/__tests__/components/GeospatialFilterMap.client.test.tsx

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ describe('GeospatialFilterMap client', () => {
135135
});
136136
});
137137

138-
it('defaults bbox relation mode to within when relation is absent', async () => {
138+
it('defaults bbox relation mode to overlap when relation is absent', async () => {
139139
render(
140140
<MemoryRouter
141141
initialEntries={[
@@ -163,8 +163,8 @@ describe('GeospatialFilterMap client', () => {
163163
name: 'Set map mode to overlap',
164164
});
165165

166-
expect(withinButton).toHaveClass('bg-blue-600');
167-
expect(overlapButton).not.toHaveClass('bg-blue-600');
166+
expect(withinButton).not.toHaveClass('bg-blue-600');
167+
expect(overlapButton).toHaveClass('bg-blue-600');
168168
});
169169

170170
it('restores and persists hex layer preference via localStorage', async () => {

0 commit comments

Comments
 (0)