|
1 | 1 | #!/usr/bin/env python3 |
2 | | -"""Examples of searching for concepts using the OMOPHub SDK.""" |
| 2 | +"""Examples of searching for concepts using the OMOPHub SDK. |
| 3 | +
|
| 4 | +Demonstrates: basic search, filtered search, autocomplete, pagination, |
| 5 | +semantic search, similarity search, bulk lexical search, and bulk semantic search. |
| 6 | +""" |
3 | 7 |
|
4 | 8 | import omophub |
5 | 9 |
|
@@ -38,17 +42,61 @@ def filtered_search() -> None: |
38 | 42 | print(f" [{c['vocabulary_id']}] {c['concept_name']}") |
39 | 43 |
|
40 | 44 |
|
41 | | -def fuzzy_search() -> None: |
42 | | - """Demonstrate typo-tolerant fuzzy search.""" |
43 | | - print("\n=== Fuzzy Search ===") |
| 45 | +def bulk_lexical_search() -> None: |
| 46 | + """Demonstrate bulk lexical search — multiple queries in one call.""" |
| 47 | + print("\n=== Bulk Lexical Search ===") |
44 | 48 |
|
45 | | - # Fuzzy search handles typos |
46 | | - results = client.search.fuzzy("diabetis mellitus") # Typo in 'diabetes' |
47 | | - concepts = results.get("concepts", results) |
48 | | - print("Fuzzy search for 'diabetis mellitus' (typo):") |
| 49 | + # Search for multiple terms at once (up to 50) |
| 50 | + results = client.search.bulk_basic( |
| 51 | + [ |
| 52 | + {"search_id": "q1", "query": "diabetes mellitus"}, |
| 53 | + {"search_id": "q2", "query": "hypertension"}, |
| 54 | + {"search_id": "q3", "query": "aspirin"}, |
| 55 | + ], |
| 56 | + defaults={"vocabulary_ids": ["SNOMED"], "page_size": 5}, |
| 57 | + ) |
49 | 58 |
|
50 | | - for c in concepts[:3]: |
51 | | - print(f" {c['concept_name']}") |
| 59 | + for item in results["results"]: |
| 60 | + print(f" {item['search_id']}: {len(item['results'])} results ({item['status']})") |
| 61 | + |
| 62 | + # Per-query overrides — different domains per query |
| 63 | + results = client.search.bulk_basic( |
| 64 | + [ |
| 65 | + {"search_id": "conditions", "query": "diabetes", "domain_ids": ["Condition"]}, |
| 66 | + {"search_id": "drugs", "query": "metformin", "domain_ids": ["Drug"]}, |
| 67 | + ], |
| 68 | + defaults={"vocabulary_ids": ["SNOMED", "RxNorm"], "page_size": 3}, |
| 69 | + ) |
| 70 | + |
| 71 | + print("\n Per-query domain overrides:") |
| 72 | + for item in results["results"]: |
| 73 | + print(f" {item['search_id']}:") |
| 74 | + for c in item["results"]: |
| 75 | + print(f" {c['concept_name']} ({c['vocabulary_id']}/{c['domain_id']})") |
| 76 | + |
| 77 | + |
| 78 | +def bulk_semantic_search() -> None: |
| 79 | + """Demonstrate bulk semantic search — multiple NLP queries in one call.""" |
| 80 | + print("\n=== Bulk Semantic Search ===") |
| 81 | + |
| 82 | + # Search for multiple natural-language queries (up to 25) |
| 83 | + results = client.search.bulk_semantic( |
| 84 | + [ |
| 85 | + {"search_id": "s1", "query": "heart failure treatment options"}, |
| 86 | + {"search_id": "s2", "query": "type 2 diabetes medication"}, |
| 87 | + {"search_id": "s3", "query": "elevated blood pressure"}, |
| 88 | + ], |
| 89 | + defaults={"threshold": 0.5, "page_size": 5}, |
| 90 | + ) |
| 91 | + |
| 92 | + for item in results["results"]: |
| 93 | + count = item.get("result_count", len(item["results"])) |
| 94 | + print(f" {item['search_id']}: {count} results ({item['status']})") |
| 95 | + |
| 96 | + # Show top result per query |
| 97 | + if item["results"]: |
| 98 | + top = item["results"][0] |
| 99 | + print(f" Top: {top['concept_name']} (score: {top['similarity_score']:.2f})") |
52 | 100 |
|
53 | 101 |
|
54 | 102 | def autocomplete_example() -> None: |
@@ -139,9 +187,10 @@ def similarity_search() -> None: |
139 | 187 | if __name__ == "__main__": |
140 | 188 | basic_search() |
141 | 189 | filtered_search() |
142 | | - fuzzy_search() |
143 | 190 | autocomplete_example() |
144 | 191 | pagination_example() |
145 | 192 | semantic_search() |
146 | 193 | semantic_pagination() |
147 | 194 | similarity_search() |
| 195 | + bulk_lexical_search() |
| 196 | + bulk_semantic_search() |
0 commit comments