Skip to content

Commit 054178d

Browse files
phernandezclaude
andcommitted
fix: parameterize SQL queries in search repository type filters
Replace f-string interpolation with parameterized queries for note_types, search_item_types, and metadata filter paths to prevent SQL injection. Fixes #591 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> Signed-off-by: phernandez <paul@basicmachines.co>
1 parent 18f00f8 commit 054178d

4 files changed

Lines changed: 98 additions & 27 deletions

File tree

src/basic_memory/repository/postgres_search_repository.py

Lines changed: 27 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,7 @@
1616
from basic_memory.repository.embedding_provider_factory import create_embedding_provider
1717
from basic_memory.repository.search_index_row import SearchIndexRow
1818
from basic_memory.repository.search_repository_base import SearchRepositoryBase
19-
from basic_memory.repository.metadata_filters import (
20-
parse_metadata_filters,
21-
build_postgres_json_path,
22-
)
19+
from basic_memory.repository.metadata_filters import parse_metadata_filters
2320
from basic_memory.repository.semantic_errors import SemanticDependenciesMissingError
2421
from basic_memory.schemas.search import SearchItemType, SearchRetrievalMode
2522

@@ -677,19 +674,23 @@ async def search(
677674
else:
678675
conditions.append("search_index.permalink = :permalink")
679676

680-
# Handle search item type filter
677+
# Handle search item type filter (parameterized for defense-in-depth)
681678
if search_item_types:
682-
type_list = ", ".join(f"'{t.value}'" for t in search_item_types)
683-
conditions.append(f"search_index.type IN ({type_list})")
684-
685-
# Handle note type filter using JSONB containment (frontmatter type field)
679+
type_placeholders = []
680+
for idx, t in enumerate(search_item_types):
681+
param_name = f"search_type_{idx}"
682+
params[param_name] = t.value
683+
type_placeholders.append(f":{param_name}")
684+
conditions.append(f"search_index.type IN ({', '.join(type_placeholders)})")
685+
686+
# Handle note type filter using JSONB containment (parameterized)
686687
if note_types:
687-
# Use JSONB @> operator for efficient containment queries
688688
type_conditions = []
689-
for note_type in note_types:
690-
# Create JSONB containment condition for each note type
689+
for idx, note_type in enumerate(note_types):
690+
param_name = f"note_type_{idx}"
691+
params[param_name] = json.dumps({"note_type": note_type})
691692
type_conditions.append(
692-
f'search_index.metadata @> \'{{"note_type": "{note_type}"}}\''
693+
f"search_index.metadata @> CAST(:{param_name} AS jsonb)"
693694
)
694695
conditions.append(f"({' OR '.join(type_conditions)})")
695696

@@ -701,15 +702,23 @@ async def search(
701702
order_by_clause = ", search_index.updated_at DESC"
702703

703704
# Handle structured metadata filters (frontmatter)
705+
# Uses jsonb_extract_path_text() / jsonb_extract_path() with parameterized
706+
# path parts instead of #>> / #> with interpolated paths.
704707
if metadata_filters:
705708
parsed_filters = parse_metadata_filters(metadata_filters)
706709
from_clause = "search_index JOIN entity ON search_index.entity_id = entity.id"
707710
metadata_expr = "entity.entity_metadata::jsonb"
708711

709712
for idx, filt in enumerate(parsed_filters):
710-
path = build_postgres_json_path(filt.path_parts)
711-
text_expr = f"({metadata_expr} #>> '{path}')"
712-
json_expr = f"({metadata_expr} #> '{path}')"
713+
# Parameterize each JSON path part individually
714+
path_param_names = []
715+
for j, part in enumerate(filt.path_parts):
716+
path_param = f"meta_path_{idx}_{j}"
717+
params[path_param] = part
718+
path_param_names.append(f":{path_param}")
719+
path_args = ", ".join(path_param_names)
720+
text_expr = f"jsonb_extract_path_text({metadata_expr}, {path_args})"
721+
json_expr = f"jsonb_extract_path({metadata_expr}, {path_args})"
713722

714723
if filt.op == "eq":
715724
value_param = f"meta_val_{idx}"
@@ -727,14 +736,12 @@ async def search(
727736
continue
728737

729738
if filt.op == "contains":
730-
import json as _json
731-
732739
base_param = f"meta_val_{idx}"
733740
tag_conditions = []
734741
# Require all values to be present
735742
for j, val in enumerate(filt.value):
736743
tag_param = f"{base_param}_{j}"
737-
params[tag_param] = _json.dumps([val])
744+
params[tag_param] = json.dumps([val])
738745
like_param = f"{base_param}_{j}_like"
739746
params[like_param] = f'%"{val}"%'
740747
like_param_single = f"{base_param}_{j}_like_single"
@@ -749,7 +756,7 @@ async def search(
749756

750757
if filt.op in {"gt", "gte", "lt", "lte", "between"}:
751758
compare_expr = (
752-
f"({metadata_expr} #>> '{path}')::double precision"
759+
f"{text_expr}::double precision"
753760
if filt.comparison == "numeric"
754761
else text_expr
755762
)

src/basic_memory/repository/sqlite_search_repository.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -666,16 +666,24 @@ async def search(
666666
params["permalink"] = permalink_text
667667
match_conditions.append("search_index.permalink MATCH :permalink")
668668

669-
# Handle entity type filter
669+
# Handle entity type filter (parameterized for defense-in-depth)
670670
if search_item_types:
671-
type_list = ", ".join(f"'{t.value}'" for t in search_item_types)
672-
conditions.append(f"search_index.type IN ({type_list})")
673-
674-
# Handle note type filter (frontmatter type field)
671+
type_placeholders = []
672+
for idx, t in enumerate(search_item_types):
673+
param_name = f"search_type_{idx}"
674+
params[param_name] = t.value
675+
type_placeholders.append(f":{param_name}")
676+
conditions.append(f"search_index.type IN ({', '.join(type_placeholders)})")
677+
678+
# Handle note type filter (frontmatter type field, parameterized)
675679
if note_types:
676-
type_list = ", ".join(f"'{t}'" for t in note_types)
680+
type_placeholders = []
681+
for idx, t in enumerate(note_types):
682+
param_name = f"note_type_{idx}"
683+
params[param_name] = t
684+
type_placeholders.append(f":{param_name}")
677685
conditions.append(
678-
f"json_extract(search_index.metadata, '$.note_type') IN ({type_list})"
686+
f"json_extract(search_index.metadata, '$.note_type') IN ({', '.join(type_placeholders)})"
679687
)
680688

681689
# Handle date filter using datetime() for proper comparison

tests/repository/test_postgres_search_repository.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -572,3 +572,28 @@ async def test_postgres_dimension_mismatch_triggers_table_recreation(session_mak
572572
row = result.fetchone()
573573
assert row is not None
574574
assert int(row[0]) == 8
575+
576+
577+
@pytest.mark.asyncio
578+
async def test_postgres_note_types_sql_injection_returns_empty(session_maker, test_project):
579+
"""Postgres JSONB containment with SQL injection payload must not alter query."""
580+
repo = PostgresSearchRepository(session_maker, project_id=test_project.id)
581+
582+
malicious_payloads = [
583+
'note"}}\' OR \'1\'=\'1',
584+
"note\"; DROP TABLE search_index;--",
585+
'note"}} UNION SELECT * FROM entity--',
586+
]
587+
for payload in malicious_payloads:
588+
results = await repo.search(note_types=[payload])
589+
assert results == [], f"Injection payload should not match: {payload}"
590+
591+
592+
@pytest.mark.asyncio
593+
async def test_postgres_metadata_filters_path_parameterized(session_maker, test_project):
594+
"""Metadata filter paths use jsonb_extract_path_text with parameterized parts."""
595+
repo = PostgresSearchRepository(session_maker, project_id=test_project.id)
596+
597+
# Nested path should work without SQL injection risk
598+
results = await repo.search(metadata_filters={"schema.confidence": {"$gt": 0.5}})
599+
assert isinstance(results, list)

tests/repository/test_search_repository.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -941,3 +941,34 @@ async def test_search_metadata_filters_numeric_comparisons(search_repository, se
941941
metadata_filters={"schema.confidence": {"$between": [0.3, 0.6]}}
942942
)
943943
assert {result.id for result in results} == {entity_low.id}
944+
945+
946+
# --- SQL injection safety tests ---
947+
# These tests verify that user-supplied filter values are parameterized and cannot
948+
# alter query structure. Each test passes a malicious payload and asserts the query
949+
# completes safely (returning empty results) rather than causing a SQL error or
950+
# data exfiltration.
951+
952+
953+
@pytest.mark.asyncio
954+
async def test_note_types_sql_injection_returns_empty(search_repository):
955+
"""note_types with SQL injection payload must not alter query structure."""
956+
malicious_payloads = [
957+
"note' OR '1'='1",
958+
"note'; DROP TABLE search_index;--",
959+
'note" OR 1=1--',
960+
"note') UNION SELECT * FROM entity--",
961+
]
962+
for payload in malicious_payloads:
963+
results = await search_repository.search(note_types=[payload])
964+
# Injection should be treated as a literal string value, not executed as SQL
965+
assert results == [], f"Injection payload should not match: {payload}"
966+
967+
968+
@pytest.mark.asyncio
969+
async def test_search_item_types_parameterized(search_repository):
970+
"""search_item_types enum values are parameterized, not interpolated."""
971+
# Normal enum usage still works
972+
results = await search_repository.search(search_item_types=[SearchItemType.ENTITY])
973+
# Should not raise — parameterized query handles enum values safely
974+
assert isinstance(results, list)

0 commit comments

Comments
 (0)