Skip to content

Commit a21e87e

Browse files
author
alex-omophub
committed
Add semantic search methods and corresponding tests
- Introduced `semantic()` and `semantic_all()` methods for performing semantic concept searches using neural embeddings. - Added `similar()` method to find concepts similar to a given reference. - Enhanced documentation for new methods, including parameters and return values. - Implemented integration tests to validate functionality and ensure correct behavior of new methods, including filtering by vocabulary and domain. - Updated existing tests to accommodate new features and ensure robustness.
1 parent e682c2f commit a21e87e

4 files changed

Lines changed: 1030 additions & 1 deletion

File tree

R/search.R

Lines changed: 185 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,11 +222,195 @@ SearchResource <- R6::R6Class(
222222
perform_get(private$.base_req, "search/suggest", query = params)
223223
},
224224

225+
#' @description
226+
#' Semantic concept search using neural embeddings.
227+
#'
228+
#' @param query Natural language search query (required).
229+
#' @param vocabulary_ids Filter by vocabulary IDs.
230+
#' @param domain_ids Filter by domain IDs.
231+
#' @param standard_concept Filter by standard concept ('S' or 'C').
232+
#' @param concept_class_id Filter by concept class ID.
233+
#' @param threshold Minimum similarity threshold (0.0-1.0, default 0.3).
234+
#' @param page Page number (1-based). Default 1.
235+
#' @param page_size Results per page (max 100). Default 20.
236+
#'
237+
#' @returns List with results and pagination metadata.
238+
semantic = function(query,
239+
vocabulary_ids = NULL,
240+
domain_ids = NULL,
241+
standard_concept = NULL,
242+
concept_class_id = NULL,
243+
threshold = NULL,
244+
page = 1,
245+
page_size = 20) {
246+
checkmate::assert_string(query, min.chars = 1)
247+
pag <- validate_pagination(page, page_size, max_page_size = 100)
248+
249+
params <- list(
250+
query = query,
251+
page = pag$page,
252+
page_size = pag$page_size
253+
)
254+
255+
if (!is.null(vocabulary_ids)) {
256+
params$vocabulary_ids <- join_params(vocabulary_ids)
257+
}
258+
if (!is.null(domain_ids)) {
259+
params$domain_ids <- join_params(domain_ids)
260+
}
261+
if (!is.null(standard_concept)) {
262+
checkmate::assert_choice(standard_concept, c("S", "C"))
263+
params$standard_concept <- standard_concept
264+
}
265+
if (!is.null(concept_class_id)) {
266+
params$concept_class_id <- concept_class_id
267+
}
268+
if (!is.null(threshold)) {
269+
checkmate::assert_number(threshold, lower = 0, upper = 1)
270+
params$threshold <- threshold
271+
}
272+
273+
perform_get(private$.base_req, "concepts/semantic-search", query = params)
274+
},
275+
276+
#' @description
277+
#' Fetch all semantic search results with automatic pagination.
278+
#'
279+
#' @param query Natural language search query (required).
280+
#' @param vocabulary_ids Filter by vocabulary IDs.
281+
#' @param domain_ids Filter by domain IDs.
282+
#' @param standard_concept Filter by standard concept ('S' or 'C').
283+
#' @param threshold Minimum similarity threshold (0.0-1.0).
284+
#' @param page_size Results per page. Default 100.
285+
#' @param max_pages Maximum pages to fetch. Default Inf.
286+
#' @param progress Show progress bar. Default `TRUE`.
287+
#'
288+
#' @returns A tibble of all matching concepts with similarity scores.
289+
semantic_all = function(query,
290+
vocabulary_ids = NULL,
291+
domain_ids = NULL,
292+
standard_concept = NULL,
293+
threshold = NULL,
294+
page_size = 100,
295+
max_pages = Inf,
296+
progress = TRUE) {
297+
fetch_fn <- function(page, size) {
298+
result <- self$semantic(
299+
query = query,
300+
vocabulary_ids = vocabulary_ids,
301+
domain_ids = domain_ids,
302+
standard_concept = standard_concept,
303+
threshold = threshold,
304+
page = page,
305+
page_size = size
306+
)
307+
# Handle different response structures
308+
if (is.list(result) && "data" %in% names(result) && "meta" %in% names(result)) {
309+
data <- result$data
310+
if (is.list(data) && "results" %in% names(data)) {
311+
data <- data$results
312+
}
313+
list(data = data, meta = result$meta)
314+
} else if (is.list(result) && "results" %in% names(result)) {
315+
list(data = result$results, meta = result$meta %||% list())
316+
} else {
317+
list(data = result %||% list(), meta = list())
318+
}
319+
}
320+
321+
paginate_all(fetch_fn, page_size = page_size, max_pages = max_pages, progress = progress)
322+
},
323+
324+
#' @description
325+
#' Find concepts similar to a reference concept or query.
326+
#'
327+
#' Must provide exactly one of: concept_id, concept_name, or query.
328+
#'
329+
#' @param concept_id Concept ID to find similar concepts for.
330+
#' @param concept_name Concept name to find similar concepts for.
331+
#' @param query Natural language query for semantic similarity.
332+
#' @param algorithm One of 'semantic', 'lexical', or 'hybrid' (default).
333+
#' @param similarity_threshold Minimum similarity (0.0-1.0). Default 0.7.
334+
#' @param page_size Max results (max 1000). Default 20.
335+
#' @param vocabulary_ids Filter by vocabulary IDs.
336+
#' @param domain_ids Filter by domain IDs.
337+
#' @param standard_concept Filter by standard concept flag ('S', 'C', or 'N').
338+
#' @param include_invalid Include invalid/deprecated concepts.
339+
#' @param include_scores Include detailed similarity scores.
340+
#' @param include_explanations Include similarity explanations.
341+
#'
342+
#' @returns List with similar_concepts and search_metadata.
343+
#'
344+
#' @note When algorithm='semantic', only single vocabulary/domain filter supported.
345+
similar = function(concept_id = NULL,
346+
concept_name = NULL,
347+
query = NULL,
348+
algorithm = "hybrid",
349+
similarity_threshold = 0.7,
350+
page_size = 20,
351+
vocabulary_ids = NULL,
352+
domain_ids = NULL,
353+
standard_concept = NULL,
354+
include_invalid = NULL,
355+
include_scores = NULL,
356+
include_explanations = NULL) {
357+
# Validate exactly one of concept_id, concept_name, or query provided
358+
provided <- sum(!is.null(concept_id), !is.null(concept_name), !is.null(query))
359+
if (provided != 1) {
360+
cli::cli_abort(
361+
"Exactly one of {.arg concept_id}, {.arg concept_name}, or {.arg query} must be provided"
362+
)
363+
}
364+
365+
checkmate::assert_choice(algorithm, c("semantic", "lexical", "hybrid"))
366+
checkmate::assert_number(similarity_threshold, lower = 0, upper = 1)
367+
checkmate::assert_integerish(page_size, lower = 1, upper = 1000)
368+
369+
body <- list(
370+
algorithm = algorithm,
371+
similarity_threshold = similarity_threshold
372+
)
373+
374+
if (!is.null(concept_id)) {
375+
body$concept_id <- as.integer(concept_id)
376+
}
377+
if (!is.null(concept_name)) {
378+
body$concept_name <- concept_name
379+
}
380+
if (!is.null(query)) {
381+
body$query <- query
382+
}
383+
if (page_size != 20) {
384+
body$page_size <- as.integer(page_size)
385+
}
386+
if (!is.null(vocabulary_ids)) {
387+
body$vocabulary_ids <- as.list(vocabulary_ids)
388+
}
389+
if (!is.null(domain_ids)) {
390+
body$domain_ids <- as.list(domain_ids)
391+
}
392+
if (!is.null(standard_concept)) {
393+
checkmate::assert_choice(standard_concept, c("S", "C", "N"))
394+
body$standard_concept <- standard_concept
395+
}
396+
if (!is.null(include_invalid)) {
397+
body$include_invalid <- include_invalid
398+
}
399+
if (!is.null(include_scores)) {
400+
body$include_scores <- include_scores
401+
}
402+
if (!is.null(include_explanations)) {
403+
body$include_explanations <- include_explanations
404+
}
405+
406+
perform_post(private$.base_req, "search/similar", body = body)
407+
},
408+
225409
#' @description
226410
#' Print resource information.
227411
print = function() {
228412
cat("<OMOPHub SearchResource>\n")
229-
cat(" Methods: basic, basic_all, advanced, autocomplete\n")
413+
cat(" Methods: basic, basic_all, advanced, autocomplete, semantic, semantic_all, similar\n")
230414
invisible(self)
231415
}
232416
),

man/SearchResource.Rd

Lines changed: 146 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)