Skip to content

Commit 87c5b74

Browse files
author
alex-omophub
committed
Add bulk search functionality to the API
- Introduced `bulk_basic` and `bulk_semantic` methods for executing multiple lexical and semantic searches in a single API call, respectively. - Updated the README to include examples for bulk search usage. - Added corresponding types for bulk search inputs and responses in the type definitions. - Implemented integration and unit tests to validate the new bulk search features.
1 parent b2a98db commit 87c5b74

6 files changed

Lines changed: 494 additions & 2 deletions

File tree

README.md

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,28 @@ for result in client.search.semantic_iter("chronic kidney disease", page_size=50
8181
print(f"{result['concept_id']}: {result['concept_name']}")
8282
```
8383

84+
### Bulk Search
85+
86+
Search for multiple terms in a single API call — much faster than individual requests:
87+
88+
```python
89+
# Bulk lexical search (up to 50 queries)
90+
results = client.search.bulk_basic([
91+
{"search_id": "q1", "query": "diabetes mellitus"},
92+
{"search_id": "q2", "query": "hypertension"},
93+
{"search_id": "q3", "query": "aspirin"},
94+
], defaults={"vocabulary_ids": ["SNOMED"], "page_size": 5})
95+
96+
for item in results["results"]:
97+
print(f"{item['search_id']}: {len(item['results'])} results")
98+
99+
# Bulk semantic search (up to 25 queries)
100+
results = client.search.bulk_semantic([
101+
{"search_id": "s1", "query": "heart failure treatment options"},
102+
{"search_id": "s2", "query": "type 2 diabetes medication"},
103+
], defaults={"threshold": 0.5, "page_size": 10})
104+
```
105+
84106
### Similarity Search
85107

86108
Find concepts similar to a known concept or natural language query:
@@ -173,7 +195,7 @@ suggestions = client.concepts.suggest("diab", vocabulary_ids=["SNOMED"], page_si
173195
| Resource | Description | Key Methods |
174196
|----------|-------------|-------------|
175197
| `concepts` | Concept lookup and batch operations | `get()`, `get_by_code()`, `batch()`, `suggest()` |
176-
| `search` | Full-text and semantic search | `basic()`, `advanced()`, `semantic()`, `semantic_iter()`, `similar()`, `fuzzy()` |
198+
| `search` | Full-text and semantic search | `basic()`, `advanced()`, `semantic()`, `similar()`, `bulk_basic()`, `bulk_semantic()` |
177199
| `hierarchy` | Navigate concept relationships | `ancestors()`, `descendants()` |
178200
| `mappings` | Cross-vocabulary mappings | `get()`, `map()` |
179201
| `vocabularies` | Vocabulary metadata | `list()`, `get()`, `stats()` |

src/omophub/resources/search.py

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,12 @@
1313
from ..types.common import PaginationMeta
1414
from ..types.concept import Concept
1515
from ..types.search import (
16+
BulkSearchDefaults,
17+
BulkSearchInput,
18+
BulkSearchResponse,
19+
BulkSemanticSearchDefaults,
20+
BulkSemanticSearchInput,
21+
BulkSemanticSearchResponse,
1622
SearchResult,
1723
SemanticSearchResult,
1824
SimilarSearchResult,
@@ -372,6 +378,77 @@ def fetch_page(
372378

373379
yield from paginate_sync(fetch_page, page_size)
374380

381+
def bulk_basic(
382+
self,
383+
searches: list[BulkSearchInput],
384+
*,
385+
defaults: BulkSearchDefaults | None = None,
386+
) -> BulkSearchResponse:
387+
"""Execute multiple lexical searches in a single request.
388+
389+
Sends up to 50 search queries in one API call. Each search can have
390+
its own filters, or you can set shared defaults.
391+
392+
Args:
393+
searches: List of search inputs, each with a unique ``search_id``
394+
and ``query``. Max 50 items.
395+
defaults: Default filters applied to all searches. Individual
396+
search-level values override defaults.
397+
398+
Returns:
399+
Bulk results with per-search status, results, and timing.
400+
401+
Example::
402+
403+
results = client.search.bulk_basic([
404+
{"search_id": "q1", "query": "diabetes"},
405+
{"search_id": "q2", "query": "hypertension"},
406+
], defaults={"vocabulary_ids": ["SNOMED"], "page_size": 5})
407+
408+
for item in results["results"]:
409+
print(item["search_id"], len(item["results"]))
410+
"""
411+
body: dict[str, Any] = {"searches": searches}
412+
if defaults:
413+
body["defaults"] = defaults
414+
return self._request.post("/search/bulk", json_data=body)
415+
416+
def bulk_semantic(
417+
self,
418+
searches: list[BulkSemanticSearchInput],
419+
*,
420+
defaults: BulkSemanticSearchDefaults | None = None,
421+
) -> BulkSemanticSearchResponse:
422+
"""Execute multiple semantic searches in a single request.
423+
424+
Sends up to 25 natural-language queries in one API call using neural
425+
embeddings. Each search can have its own filters and threshold.
426+
427+
Args:
428+
searches: List of search inputs, each with a unique ``search_id``
429+
and ``query`` (1-500 chars). Max 25 items.
430+
defaults: Default filters applied to all searches. Individual
431+
search-level values override defaults.
432+
433+
Returns:
434+
Bulk results with per-search status, similarity scores, and
435+
optional query enhancements.
436+
437+
Example::
438+
439+
results = client.search.bulk_semantic([
440+
{"search_id": "s1", "query": "heart failure treatment"},
441+
{"search_id": "s2", "query": "type 2 diabetes medication"},
442+
], defaults={"threshold": 0.8, "page_size": 10})
443+
444+
for item in results["results"]:
445+
print(item["search_id"], item.get("result_count", 0))
446+
"""
447+
body: dict[str, Any] = {"searches": searches}
448+
if defaults:
449+
body["defaults"] = defaults
450+
return self._request.post("/search/semantic-bulk", json_data=body)
451+
375452
def similar(
376453
self,
377454
*,
@@ -630,6 +707,46 @@ async def semantic_iter(
630707

631708
page += 1
632709

710+
async def bulk_basic(
711+
self,
712+
searches: list[BulkSearchInput],
713+
*,
714+
defaults: BulkSearchDefaults | None = None,
715+
) -> BulkSearchResponse:
716+
"""Execute multiple lexical searches in a single request.
717+
718+
Args:
719+
searches: List of search inputs (max 50).
720+
defaults: Default filters for all searches.
721+
722+
Returns:
723+
Bulk results with per-search status and results.
724+
"""
725+
body: dict[str, Any] = {"searches": searches}
726+
if defaults:
727+
body["defaults"] = defaults
728+
return await self._request.post("/search/bulk", json_data=body)
729+
730+
async def bulk_semantic(
731+
self,
732+
searches: list[BulkSemanticSearchInput],
733+
*,
734+
defaults: BulkSemanticSearchDefaults | None = None,
735+
) -> BulkSemanticSearchResponse:
736+
"""Execute multiple semantic searches in a single request.
737+
738+
Args:
739+
searches: List of search inputs (max 25).
740+
defaults: Default filters for all searches.
741+
742+
Returns:
743+
Bulk results with per-search status and similarity scores.
744+
"""
745+
body: dict[str, Any] = {"searches": searches}
746+
if defaults:
747+
body["defaults"] = defaults
748+
return await self._request.post("/search/semantic-bulk", json_data=body)
749+
633750
async def similar(
634751
self,
635752
*,

src/omophub/types/__init__.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,15 @@
3434
RelationshipType,
3535
)
3636
from .search import (
37+
BulkSearchDefaults,
38+
BulkSearchInput,
39+
BulkSearchResponse,
40+
BulkSearchResultItem,
41+
BulkSemanticSearchDefaults,
42+
BulkSemanticSearchInput,
43+
BulkSemanticSearchResponse,
44+
BulkSemanticSearchResultItem,
45+
QueryEnhancement,
3746
SearchFacet,
3847
SearchFacets,
3948
SearchMetadata,
@@ -84,6 +93,16 @@
8493
"RelationshipSummary",
8594
"RelationshipType",
8695
"ResponseMeta",
96+
# Bulk Search
97+
"BulkSearchDefaults",
98+
"BulkSearchInput",
99+
"BulkSearchResponse",
100+
"BulkSearchResultItem",
101+
"BulkSemanticSearchDefaults",
102+
"BulkSemanticSearchInput",
103+
"BulkSemanticSearchResponse",
104+
"BulkSemanticSearchResultItem",
105+
"QueryEnhancement",
87106
# Search
88107
"SearchFacet",
89108
"SearchFacets",

src/omophub/types/search.py

Lines changed: 107 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
from typing import TYPE_CHECKING, Any, TypedDict
66

7-
from typing_extensions import NotRequired
7+
from typing_extensions import NotRequired, Required
88

99
if TYPE_CHECKING:
1010
from .concept import Concept
@@ -77,6 +77,112 @@ class SimilarSearchResult(TypedDict):
7777
search_metadata: SimilarSearchMetadata
7878

7979

80+
# ---------------------------------------------------------------------------
81+
# Bulk search types
82+
# ---------------------------------------------------------------------------
83+
84+
85+
class BulkSearchInput(TypedDict, total=False):
86+
"""Input for a single query in a bulk lexical search."""
87+
88+
search_id: Required[str]
89+
query: Required[str]
90+
vocabulary_ids: list[str]
91+
domain_ids: list[str]
92+
concept_class_ids: list[str]
93+
standard_concept: str
94+
include_invalid: bool
95+
page_size: int
96+
97+
98+
class BulkSearchDefaults(TypedDict, total=False):
99+
"""Default filters applied to all searches in a bulk lexical request."""
100+
101+
vocabulary_ids: list[str]
102+
domain_ids: list[str]
103+
concept_class_ids: list[str]
104+
standard_concept: str
105+
include_invalid: bool
106+
page_size: int
107+
108+
109+
class BulkSearchResultItem(TypedDict):
110+
"""Result for a single query in a bulk lexical search."""
111+
112+
search_id: str
113+
query: str
114+
results: list[dict[str, Any]]
115+
status: str # "completed" | "failed"
116+
error: NotRequired[str]
117+
duration: NotRequired[int]
118+
119+
120+
class BulkSearchResponse(TypedDict):
121+
"""Response from bulk lexical search."""
122+
123+
results: list[BulkSearchResultItem]
124+
total_searches: int
125+
completed_searches: int
126+
failed_searches: int
127+
128+
129+
class BulkSemanticSearchInput(TypedDict, total=False):
130+
"""Input for a single query in a bulk semantic search."""
131+
132+
search_id: Required[str]
133+
query: Required[str] # 1-500 characters
134+
page_size: int
135+
threshold: float
136+
vocabulary_ids: list[str]
137+
domain_ids: list[str]
138+
standard_concept: str
139+
concept_class_id: str
140+
141+
142+
class BulkSemanticSearchDefaults(TypedDict, total=False):
143+
"""Default filters applied to all searches in a bulk semantic request."""
144+
145+
page_size: int
146+
threshold: float
147+
vocabulary_ids: list[str]
148+
domain_ids: list[str]
149+
standard_concept: str
150+
concept_class_id: str
151+
152+
153+
class QueryEnhancement(TypedDict, total=False):
154+
"""Query enhancement info from semantic search."""
155+
156+
original_query: str
157+
enhanced_query: str
158+
abbreviations_expanded: list[str]
159+
misspellings_corrected: list[str]
160+
161+
162+
class BulkSemanticSearchResultItem(TypedDict):
163+
"""Result for a single query in a bulk semantic search."""
164+
165+
search_id: str
166+
query: str
167+
results: list[dict[str, Any]]
168+
status: str # "completed" | "failed"
169+
error: NotRequired[str]
170+
similarity_threshold: NotRequired[float]
171+
result_count: NotRequired[int]
172+
duration: NotRequired[int]
173+
query_enhancement: NotRequired[QueryEnhancement]
174+
175+
176+
class BulkSemanticSearchResponse(TypedDict):
177+
"""Response from bulk semantic search."""
178+
179+
results: list[BulkSemanticSearchResultItem]
180+
total_searches: int
181+
completed_count: int
182+
failed_count: int
183+
total_duration: NotRequired[int]
184+
185+
80186
class SearchFacet(TypedDict):
81187
"""Search facet with count."""
82188

0 commit comments

Comments
 (0)