3535NEAR_GLOBAL_DIAGONAL_KM = 15_000
3636MIN_BBOX_IOU_OVERLAP_RATIO = float (os .getenv ("MIN_BBOX_IOU_OVERLAP_RATIO" , "0.001" ))
3737ALLOWED_GEO_RELATIONS = {"intersects" , "within" , "contains" , "disjoint" }
38+ BBOX_CONTAINMENT_WEIGHT = float (os .getenv ("BBOX_CONTAINMENT_WEIGHT" , "0.7" ))
39+ BBOX_IOU_WEIGHT = float (os .getenv ("BBOX_IOU_WEIGHT" , "0.3" ))
40+ BBOX_SPATIAL_BOOST_WEIGHT = float (os .getenv ("BBOX_SPATIAL_BOOST_WEIGHT" , "0.8" ))
3841
3942
4043def _escape_query_string_brackets (query_text : str ) -> str :
@@ -346,6 +349,61 @@ def _normalize_min_overlap_ratio(raw: object) -> float:
346349 return value
347350
348351
352+ def _normalized_spatial_weights () -> tuple [float , float ]:
353+ containment_weight = max (0.0 , BBOX_CONTAINMENT_WEIGHT )
354+ overlap_weight = max (0.0 , BBOX_IOU_WEIGHT )
355+ total = containment_weight + overlap_weight
356+ if total <= 0.0 :
357+ return 0.7 , 0.3
358+ return containment_weight / total , overlap_weight / total
359+
360+
361+ def _compute_bbox_spatial_metrics (
362+ * ,
363+ d_minx : float ,
364+ d_maxx : float ,
365+ d_miny : float ,
366+ d_maxy : float ,
367+ q_minx : float ,
368+ q_maxx : float ,
369+ q_miny : float ,
370+ q_maxy : float ,
371+ ) -> dict [str , float ]:
372+ ix1 = max (d_minx , q_minx )
373+ iy1 = max (d_miny , q_miny )
374+ ix2 = min (d_maxx , q_maxx )
375+ iy2 = min (d_maxy , q_maxy )
376+
377+ iw = max (0.0 , ix2 - ix1 )
378+ ih = max (0.0 , iy2 - iy1 )
379+ intersection = iw * ih
380+ doc_area = max (0.0 , (d_maxx - d_minx ) * (d_maxy - d_miny ))
381+ query_area = max (0.0 , (q_maxx - q_minx ) * (q_maxy - q_miny ))
382+
383+ if intersection <= 0.0 or doc_area <= 0.0 or query_area <= 0.0 :
384+ return {
385+ "overlap_ratio" : 0.0 ,
386+ "containment_ratio" : 0.0 ,
387+ "spatial_score" : 0.0 ,
388+ }
389+
390+ union_area = doc_area + query_area - intersection
391+ overlap_ratio = 0.0 if union_area <= 0.0 else intersection / union_area
392+ containment_ratio = intersection / doc_area
393+
394+ overlap_ratio = min (max (overlap_ratio , 0.0 ), 1.0 )
395+ containment_ratio = min (max (containment_ratio , 0.0 ), 1.0 )
396+
397+ containment_weight , overlap_weight = _normalized_spatial_weights ()
398+ spatial_score = containment_weight * containment_ratio + overlap_weight * overlap_ratio
399+
400+ return {
401+ "overlap_ratio" : overlap_ratio ,
402+ "containment_ratio" : containment_ratio ,
403+ "spatial_score" : min (max (spatial_score , 0.0 ), 1.0 ),
404+ }
405+
406+
349407def _build_bbox_overlap_filter (
350408 * ,
351409 q_minx : float ,
@@ -973,14 +1031,14 @@ async def search_resources(
9731031 bool_query_dict ["must_not" ] = combined_must_not
9741032
9751033 # Base query is a plain bool; we will wrap it in script_score when we have
976- # bbox info for overlap-based relevance .
1034+ # bbox info for spatial reranking .
9771035 base_query = {"query" : {"bool" : bool_query_dict }}
9781036 overlap_context = None
9791037
980- # Add bbox overlap-based scoring when bbox filter is present.
981- # This uses an approximate IoU between the document's bbox and the query bbox,
982- # computed from numeric bbox_* fields and the query bbox bounds , and does NOT
983- # use centroids at all.
1038+ # Add bbox spatial scoring when bbox filter is present.
1039+ # This combines document containment within the query bbox and IoU
1040+ # extent similarity using numeric bbox_* fields, and does NOT use
1041+ # centroids at all.
9841042 if bbox_filter_info :
9851043 top_left = bbox_filter_info ["top_left" ]
9861044 bottom_right = bbox_filter_info ["bottom_right" ]
@@ -991,8 +1049,10 @@ async def search_resources(
9911049 q_miny = min (float (bottom_right ["lat" ]), float (top_left ["lat" ]))
9921050 q_maxy = max (float (bottom_right ["lat" ]), float (top_left ["lat" ]))
9931051
994- # Persist query bbox bounds so we can later compute a concrete
995- # bbox_overlap_ratio per hit in Python for the API meta block.
1052+ containment_weight , overlap_weight = _normalized_spatial_weights ()
1053+
1054+ # Persist query bbox bounds so we can later compute concrete bbox
1055+ # spatial metrics per hit in Python for the API meta block.
9961056 overlap_context = {
9971057 "qMinX" : q_minx ,
9981058 "qMaxX" : q_maxx ,
@@ -1068,6 +1128,15 @@ async def search_resources(
10681128 return 0.0;
10691129 }
10701130
1131+ // Prefer records whose mapped extent is mostly inside
1132+ // the user's view, while still rewarding similar extent.
1133+ double containmentRatio = intersection / docArea;
1134+ if (containmentRatio < 0.0) {
1135+ containmentRatio = 0.0;
1136+ } else if (containmentRatio > 1.0) {
1137+ containmentRatio = 1.0;
1138+ }
1139+
10711140 // Overlap similarity: IoU between document bbox and query bbox.
10721141 // This is high (near 1.0) only when the two extents are similar
10731142 // in both size and location.
@@ -1078,16 +1147,30 @@ async def search_resources(
10781147 overlapRatio = 1.0;
10791148 }
10801149
1081- // Combine base score (text relevance when present) with IoU.
1150+ double spatialScore =
1151+ (params.containmentWeight * containmentRatio) +
1152+ (params.overlapWeight * overlapRatio);
1153+
1154+ if (spatialScore < 0.0) {
1155+ spatialScore = 0.0;
1156+ } else if (spatialScore > 1.0) {
1157+ spatialScore = 1.0;
1158+ }
1159+
1160+ // Combine base text relevance with a spatial boost.
10821161 double baseScore = _score;
1083- // Keep scores positive and emphasize high-overlap maps.
1084- return baseScore * (0.1 + 0.9 * overlapRatio);
1162+ return baseScore * (
1163+ 1.0 + (params.spatialBoostWeight * spatialScore)
1164+ );
10851165 """ ,
10861166 "params" : {
10871167 "qMinX" : q_minx ,
10881168 "qMaxX" : q_maxx ,
10891169 "qMinY" : q_miny ,
10901170 "qMaxY" : q_maxy ,
1171+ "containmentWeight" : containment_weight ,
1172+ "overlapWeight" : overlap_weight ,
1173+ "spatialBoostWeight" : max (0.0 , BBOX_SPATIAL_BOOST_WEIGHT ),
10911174 },
10921175 },
10931176 }
@@ -1388,14 +1471,14 @@ async def process_search_response(
13881471 [resource ["id" ] for resource in resource_rows ]
13891472 )
13901473
1391- # Precompute lookups from id -> score and id -> bbox_overlap_ratio so we can
1392- # expose them in the API layer meta block. The ratio is computed as the
1393- # fraction of the document bbox area that lies inside the query bbox,
1394- # mirroring the Painless scoring script semantics.
1474+ # Precompute lookups from id -> score and bbox spatial metrics so we can
1475+ # expose them in the API layer meta block.
13951476 id_to_score : dict [str , float ] = {}
13961477 id_to_overlap : dict [str , float ] = {}
1478+ id_to_containment : dict [str , float ] = {}
1479+ id_to_spatial_score : dict [str , float ] = {}
13971480
1398- def _compute_overlap_ratio (hit_dict : dict , ctx : dict ) -> float | None :
1481+ def _compute_spatial_metrics (hit_dict : dict , ctx : dict ) -> dict [ str , float ] | None :
13991482 try :
14001483 src = hit_dict .get ("_source" , {})
14011484 d_minx = float (src ["bbox_minx" ])
@@ -1410,43 +1493,34 @@ def _compute_overlap_ratio(hit_dict: dict, ctx: dict) -> float | None:
14101493 q_miny = float (ctx ["qMinY" ])
14111494 q_maxy = float (ctx ["qMaxY" ])
14121495
1413- ix1 = max (d_minx , q_minx )
1414- iy1 = max (d_miny , q_miny )
1415- ix2 = min (d_maxx , q_maxx )
1416- iy2 = min (d_maxy , q_maxy )
1417-
1418- iw = max (0.0 , ix2 - ix1 )
1419- ih = max (0.0 , iy2 - iy1 )
1420- intersection = iw * ih
1421- doc_area = max (0.0 , (d_maxx - d_minx ) * (d_maxy - d_miny ))
1422- query_area = max (0.0 , (q_maxx - q_minx ) * (q_maxy - q_miny ))
1423- if intersection <= 0.0 or doc_area <= 0.0 or query_area <= 0.0 :
1424- return 0.0
1425-
1426- union_area = doc_area + query_area - intersection
1427- if union_area <= 0.0 :
1428- return 0.0
1429-
1430- ratio = intersection / union_area
1431- if ratio < 0.0 :
1432- ratio = 0.0
1433- elif ratio > 1.0 :
1434- ratio = 1.0
1435- return ratio
1496+ return _compute_bbox_spatial_metrics (
1497+ d_minx = d_minx ,
1498+ d_maxx = d_maxx ,
1499+ d_miny = d_miny ,
1500+ d_maxy = d_maxy ,
1501+ q_minx = q_minx ,
1502+ q_maxx = q_maxx ,
1503+ q_miny = q_miny ,
1504+ q_maxy = q_maxy ,
1505+ )
14361506
14371507 for hit in hits :
14381508 rid = hit ["_source" ]["id" ]
14391509 id_to_score [rid ] = hit .get ("_score" , 0.0 )
14401510 if overlap_context :
1441- ratio = _compute_overlap_ratio (hit , overlap_context )
1442- if ratio is not None :
1443- id_to_overlap [rid ] = ratio
1511+ metrics = _compute_spatial_metrics (hit , overlap_context )
1512+ if metrics is not None :
1513+ id_to_overlap [rid ] = metrics ["overlap_ratio" ]
1514+ id_to_containment [rid ] = metrics ["containment_ratio" ]
1515+ id_to_spatial_score [rid ] = metrics ["spatial_score" ]
14441516
14451517 for resource in resource_rows :
14461518 rid = resource ["id" ]
14471519 distribution_context = distribution_contexts .get (rid )
14481520 score = id_to_score .get (rid , 0.0 )
14491521 overlap_ratio = id_to_overlap .get (rid )
1522+ containment_ratio = id_to_containment .get (rid )
1523+ spatial_score = id_to_spatial_score .get (rid )
14501524
14511525 doc : dict = {
14521526 "type" : "document" ,
@@ -1459,6 +1533,10 @@ def _compute_overlap_ratio(hit_dict: dict, ctx: dict) -> float | None:
14591533 }
14601534 if overlap_ratio is not None :
14611535 doc ["bbox_overlap_ratio" ] = overlap_ratio
1536+ if containment_ratio is not None :
1537+ doc ["bbox_containment_ratio" ] = containment_ratio
1538+ if spatial_score is not None :
1539+ doc ["bbox_spatial_score" ] = spatial_score
14621540
14631541 processed_resources .append (doc )
14641542
0 commit comments