Skip to content

Commit 52650b2

Browse files
committed
fix(search): support alias-like symbol queries
1 parent a2013d2 commit 52650b2

1 file changed

Lines changed: 162 additions & 3 deletions

File tree

grapha/src/search.rs

Lines changed: 162 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ struct SearchFields {
6969
locator: tantivy::schema::Field,
7070
name: tantivy::schema::Field,
7171
name_lower: tantivy::schema::Field,
72+
search_terms: Option<tantivy::schema::Field>,
7273
kind: tantivy::schema::Field,
7374
file: tantivy::schema::Field,
7475
module: tantivy::schema::Field,
@@ -84,6 +85,7 @@ fn schema() -> (Schema, SearchFields) {
8485
let name = schema_builder.add_text_field("name", TEXT | STORED);
8586
// Lowercased, non-tokenized name for fuzzy regex matching on CamelCase symbols
8687
let name_lower = schema_builder.add_text_field("name_lower", STRING);
88+
let search_terms = schema_builder.add_text_field("search_terms", TEXT);
8789
let kind = schema_builder.add_text_field("kind", STRING | STORED);
8890
let file = schema_builder.add_text_field("file", TEXT | STORED);
8991
let module = schema_builder.add_text_field("module", STRING | STORED);
@@ -98,6 +100,7 @@ fn schema() -> (Schema, SearchFields) {
98100
locator,
99101
name,
100102
name_lower,
103+
search_terms: Some(search_terms),
101104
kind,
102105
file,
103106
module,
@@ -127,7 +130,7 @@ fn node_document(fields: SearchFields, node: &Node, locator: &str) -> Result<Tan
127130
let visibility_str = serde_json::to_string(&node.visibility)?
128131
.trim_matches('"')
129132
.to_string();
130-
Ok(doc!(
133+
let mut document = doc!(
131134
fields.id => node.id.clone(),
132135
fields.locator => locator.to_string(),
133136
fields.name => node.name.clone(),
@@ -138,7 +141,14 @@ fn node_document(fields: SearchFields, node: &Node, locator: &str) -> Result<Tan
138141
fields.module_lower => node.module.as_deref().unwrap_or("").to_lowercase(),
139142
fields.visibility => visibility_str,
140143
fields.role => role_to_string(&node.role),
141-
))
144+
);
145+
if let Some(search_terms_field) = fields.search_terms {
146+
document.add_text(
147+
search_terms_field,
148+
search_terms_text(&node.name, locator, &node.file.to_string_lossy()),
149+
);
150+
}
151+
Ok(document)
142152
}
143153

144154
fn rebuild_index_impl(graph: &Graph, index_path: &Path) -> Result<Index> {
@@ -242,6 +252,7 @@ fn resolve_fields(index: &Index) -> Result<SearchFields> {
242252
locator: schema.get_field("locator")?,
243253
name: schema.get_field("name")?,
244254
name_lower: schema.get_field("name_lower")?,
255+
search_terms: schema.get_field("search_terms").ok(),
245256
kind: schema.get_field("kind")?,
246257
file: schema.get_field("file")?,
247258
module: schema.get_field("module")?,
@@ -289,6 +300,82 @@ fn tokenize_locator_query(query: &str) -> Vec<String> {
289300
.collect()
290301
}
291302

303+
fn tokenize_search_terms(input: &str) -> Vec<String> {
304+
let mut tokens = Vec::new();
305+
let mut current = String::new();
306+
let mut previous: Option<char> = None;
307+
308+
for ch in input.chars() {
309+
if !ch.is_ascii_alphanumeric() {
310+
if !current.is_empty() {
311+
tokens.push(std::mem::take(&mut current));
312+
}
313+
previous = None;
314+
continue;
315+
}
316+
317+
let starts_new_token = previous.is_some_and(|prev| {
318+
(prev.is_ascii_lowercase() && ch.is_ascii_uppercase())
319+
|| (prev.is_ascii_alphabetic() && ch.is_ascii_digit())
320+
|| (prev.is_ascii_digit() && ch.is_ascii_alphabetic())
321+
});
322+
if starts_new_token && !current.is_empty() {
323+
tokens.push(std::mem::take(&mut current));
324+
}
325+
326+
current.push(ch.to_ascii_lowercase());
327+
previous = Some(ch);
328+
}
329+
330+
if !current.is_empty() {
331+
tokens.push(current);
332+
}
333+
334+
tokens.sort();
335+
tokens.dedup();
336+
tokens
337+
}
338+
339+
fn search_terms_text(name: &str, locator: &str, file: &str) -> String {
340+
let mut tokens = tokenize_search_terms(name);
341+
tokens.extend(tokenize_search_terms(locator));
342+
tokens.extend(tokenize_search_terms(file));
343+
tokens.sort();
344+
tokens.dedup();
345+
tokens.join(" ")
346+
}
347+
348+
fn identifier_like_query(query: &str) -> bool {
349+
!query.is_empty()
350+
&& query
351+
.chars()
352+
.all(|ch| ch.is_ascii_alphanumeric() || matches!(ch, '_' | '-' | '.' | '/'))
353+
}
354+
355+
fn search_terms_query(
356+
fields: SearchFields,
357+
query_str: &str,
358+
) -> Option<Box<dyn tantivy::query::Query>> {
359+
let search_terms_field = fields.search_terms?;
360+
let terms = tokenize_search_terms(query_str);
361+
if terms.len() < 2 {
362+
return None;
363+
}
364+
365+
let clauses: Vec<(Occur, Box<dyn tantivy::query::Query>)> = terms
366+
.into_iter()
367+
.map(|term| {
368+
let term = Term::from_field_text(search_terms_field, &term);
369+
(
370+
Occur::Must,
371+
Box::new(TermQuery::new(term, IndexRecordOption::Basic))
372+
as Box<dyn tantivy::query::Query>,
373+
)
374+
})
375+
.collect();
376+
Some(Box::new(BooleanQuery::new(clauses)))
377+
}
378+
292379
fn requires_full_rebuild_for_locators(previous: &Graph, delta: &GraphDelta<'_>) -> bool {
293380
if delta
294381
.added_edges
@@ -380,7 +467,20 @@ pub fn search_filtered(
380467
} else {
381468
let query_parser =
382469
QueryParser::for_index(index, vec![fields.name, fields.locator, fields.file]);
383-
Box::new(query_parser.parse_query(query_str)?)
470+
let exact_query =
471+
Box::new(query_parser.parse_query(query_str)?) as Box<dyn tantivy::query::Query>;
472+
if identifier_like_query(query_str) {
473+
if let Some(token_query) = search_terms_query(fields, query_str) {
474+
Box::new(BooleanQuery::new(vec![
475+
(Occur::Should, exact_query),
476+
(Occur::Should, token_query),
477+
]))
478+
} else {
479+
exact_query
480+
}
481+
} else {
482+
exact_query
483+
}
384484
};
385485

386486
let mut clauses: Vec<(Occur, Box<dyn tantivy::query::Query>)> = vec![(Occur::Must, text_query)];
@@ -937,6 +1037,65 @@ mod tests {
9371037
);
9381038
}
9391039

1040+
#[test]
1041+
fn identifier_search_matches_token_equivalent_wrapper_name() {
1042+
let dir = tempfile::tempdir().unwrap();
1043+
let graph = Graph {
1044+
version: "0.1.0".to_string(),
1045+
nodes: vec![
1046+
Node {
1047+
id: "AppUI/Sources/AppResource/Generated/Strings.generated.swift::L10n::commonuiListSearchEmpty".into(),
1048+
kind: NodeKind::Property,
1049+
name: "commonuiListSearchEmpty".into(),
1050+
file: "AppUI/Sources/AppResource/Generated/Strings.generated.swift".into(),
1051+
span: Span {
1052+
start: [0, 0],
1053+
end: [1, 0],
1054+
},
1055+
visibility: Visibility::Public,
1056+
metadata: HashMap::new(),
1057+
role: None,
1058+
signature: None,
1059+
doc_comment: None,
1060+
module: Some("AppUI".into()),
1061+
snippet: None,
1062+
},
1063+
Node {
1064+
id: "AppUI/Sources/AppResource/Generated/Strings.generated.swift::L10n::roomShareNoFriedns".into(),
1065+
kind: NodeKind::Property,
1066+
name: "roomShareNoFriedns".into(),
1067+
file: "AppUI/Sources/AppResource/Generated/Strings.generated.swift".into(),
1068+
span: Span {
1069+
start: [2, 0],
1070+
end: [3, 0],
1071+
},
1072+
visibility: Visibility::Public,
1073+
metadata: HashMap::new(),
1074+
role: None,
1075+
signature: None,
1076+
doc_comment: None,
1077+
module: Some("AppUI".into()),
1078+
snippet: None,
1079+
},
1080+
],
1081+
edges: vec![],
1082+
};
1083+
let index = build_index(&graph, dir.path()).unwrap();
1084+
1085+
let results = search(&index, "commonuiSearchListEmpty", 10).unwrap();
1086+
1087+
assert!(
1088+
results
1089+
.iter()
1090+
.any(|result| result.name == "commonuiListSearchEmpty"),
1091+
"tokenized identifier search should find the real generated wrapper, got: {:?}",
1092+
results
1093+
.iter()
1094+
.map(|result| &result.name)
1095+
.collect::<Vec<_>>()
1096+
);
1097+
}
1098+
9401099
#[test]
9411100
fn filter_by_kind() {
9421101
let dir = tempfile::tempdir().unwrap();

0 commit comments

Comments
 (0)