1+ import json
12import logging
23import os
34from typing import Optional
@@ -330,10 +331,24 @@ async def search_wof(
330331 q : str = Query (..., description = "Search query" ),
331332 limit : int = Query (10 , description = "Maximum number of results" , ge = 1 , le = 100 ),
332333 offset : int = Query (0 , description = "Number of results to skip" , ge = 0 ),
334+ exclude_placetypes : Optional [str ] = Query (
335+ None ,
336+ description = "Comma-separated list of placetypes to exclude (default: microhood,neighbourhood,venue)" ,
337+ ),
333338 request : Request = None ,
334339):
335340 """Search Who's on First gazetteer."""
336341 try :
342+ # Default placetypes to exclude for autosuggestion
343+ if exclude_placetypes is None :
344+ exclude_placetypes = "localadmin,microhood,neighbourhood,venue"
345+ # Handle case where exclude_placetypes might be a Query object (when called directly in tests)
346+ elif not isinstance (exclude_placetypes , str ):
347+ # If it's not a string (e.g., Query object), use default
348+ exclude_placetypes = "localadmin,microhood,neighbourhood,venue"
349+
350+ excluded_types = [pt .strip () for pt in exclude_placetypes .split ("," ) if pt .strip ()]
351+
337352 # Build search query
338353 search_terms = q .split ()
339354 conditions = []
@@ -346,6 +361,16 @@ async def search_wof(
346361 )
347362 )
348363
364+ # Exclude confusing placetypes for autosuggestion
365+ # Keep records where placetype is NULL or not in excluded list
366+ if excluded_types :
367+ conditions .append (
368+ or_ (
369+ gazetteer_wof_spr .c .placetype .is_ (None ),
370+ ~ gazetteer_wof_spr .c .placetype .in_ (excluded_types ),
371+ )
372+ )
373+
349374 query = (
350375 select (gazetteer_wof_spr )
351376 .where (and_ (* conditions ))
@@ -356,13 +381,121 @@ async def search_wof(
356381
357382 results = await database .fetch_all (query )
358383
384+ # Collect wok_ids for batch fetching ancestors and geojson
385+ wok_ids = [result ["wok_id" ] for result in results ]
386+
387+ # Fetch ancestors for all results in batch
388+ ancestors_map = {}
389+ if wok_ids :
390+ ancestors_query = select (gazetteer_wof_ancestors ).where (
391+ gazetteer_wof_ancestors .c .wok_id .in_ (wok_ids )
392+ )
393+ ancestors = await database .fetch_all (ancestors_query )
394+
395+ # Group ancestors by wok_id
396+ for ancestor in ancestors :
397+ wok_id = ancestor ["wok_id" ]
398+ if wok_id not in ancestors_map :
399+ ancestors_map [wok_id ] = []
400+ ancestors_map [wok_id ].append (dict (ancestor ))
401+
402+ # Fetch ancestor names from spr table
403+ ancestor_ids = list (
404+ set (
405+ [
406+ a ["ancestor_id" ]
407+ for ancestors_list in ancestors_map .values ()
408+ for a in ancestors_list
409+ ]
410+ )
411+ )
412+ if ancestor_ids :
413+ # Compare ancestor_id (Integer) with wok_id (BigInteger) - PostgreSQL handles type coercion
414+ ancestor_spr_query = select (gazetteer_wof_spr ).where (
415+ gazetteer_wof_spr .c .wok_id .in_ (ancestor_ids )
416+ )
417+ ancestor_sprs = await database .fetch_all (ancestor_spr_query )
418+ ancestor_names_map = {spr ["wok_id" ]: spr ["name" ] for spr in ancestor_sprs }
419+
420+ # Add names to ancestors
421+ for wok_id , ancestors_list in ancestors_map .items ():
422+ for ancestor in ancestors_list :
423+ ancestor ["name" ] = ancestor_names_map .get (ancestor ["ancestor_id" ])
424+
425+ # Fetch GeoJSON for all results in batch
426+ geojson_map = {}
427+ if wok_ids :
428+ geojson_query = (
429+ select (gazetteer_wof_geojson )
430+ .where (gazetteer_wof_geojson .c .wok_id .in_ (wok_ids ))
431+ .order_by (
432+ # Prefer non-alt geometries, then by source preference
433+ gazetteer_wof_geojson .c .is_alt .asc (),
434+ gazetteer_wof_geojson .c .source .asc (),
435+ )
436+ )
437+ geojson_records = await database .fetch_all (geojson_query )
438+
439+ # Group by wok_id, keeping only the first (best) one
440+ for geojson_record in geojson_records :
441+ wok_id = geojson_record ["wok_id" ]
442+ if wok_id not in geojson_map :
443+ geojson_map [wok_id ] = dict (geojson_record )
444+
359445 # Convert results to JSON:API format
360446 data = []
361447 for row in results :
362448 row_dict = dict (row )
449+ wok_id = row_dict .get ("wok_id" )
450+
451+ # Get ancestors for this place
452+ ancestors = ancestors_map .get (wok_id , [])
453+
454+ # Build hierarchy: prefer region, county, locality for display
455+ hierarchy_parts = []
456+ hierarchy_placetypes = ["region" , "county" , "locality" ]
457+
458+ # Sort ancestors by placetype priority
459+ sorted_ancestors = sorted (
460+ ancestors ,
461+ key = lambda a : hierarchy_placetypes .index (a .get ("ancestor_placetype" , "" ))
462+ if a .get ("ancestor_placetype" ) in hierarchy_placetypes
463+ else 999 ,
464+ )
465+
466+ for ancestor in sorted_ancestors :
467+ if ancestor .get ("ancestor_placetype" ) in hierarchy_placetypes and ancestor .get (
468+ "name"
469+ ):
470+ hierarchy_parts .append (ancestor ["name" ])
471+
472+ # Build display name: "Name, Parent1, Parent2, Country"
473+ display_parts = [row_dict .get ("name" , "" )]
474+ display_parts .extend (hierarchy_parts )
475+ if row_dict .get ("country" ):
476+ display_parts .append (row_dict ["country" ])
477+ display_name = ", " .join (display_parts )
478+
479+ # Get GeoJSON for this place
480+ geojson_record = geojson_map .get (wok_id )
481+ geojson_data = None
482+ if geojson_record :
483+ try :
484+ geojson_data = json .loads (geojson_record ["body" ])
485+ except (json .JSONDecodeError , TypeError ):
486+ geojson_data = None
487+
363488 # Sanitize the data for JSON serialization
364489 row_dict = sanitize_for_json (row_dict )
365490
491+ # Sanitize ancestors/hierarchy data (may contain date fields)
492+ sanitized_ancestors = sanitize_for_json (ancestors )
493+
494+ # Add enhanced fields
495+ row_dict ["display_name" ] = display_name
496+ row_dict ["hierarchy" ] = sanitized_ancestors
497+ row_dict ["geojson" ] = geojson_data
498+
366499 # Format as JSON:API resource
367500 formatted_row = {
368501 "id" : str (row_dict .get ("wok_id" , row_dict .get ("id" , "" ))),
@@ -395,5 +528,6 @@ async def search_wof(
395528 return JSONResponse (content = reordered_response )
396529
397530 except Exception as e :
398- logger .error (f"Error searching WOF: { str (e )} " , exc_info = True )
399- raise HTTPException (status_code = 500 , detail = "Failed to search WOF" ) from e
531+ error_msg = str (e )
532+ logger .error (f"Error searching WOF: { error_msg } " , exc_info = True )
533+ raise HTTPException (status_code = 500 , detail = f"Failed to search WOF: { error_msg } " ) from e
0 commit comments