Skip to content

Commit 41d34b2

Browse files
fix: prevent path traversal in knowledge base create endpoint (#12337)
* fix: prevent path traversal in knowledge base create endpoint Add _validate_kb_path_containment() helper that applies Path.resolve() and is_relative_to() before any directory creation in create_knowledge_base. Replaces duplicated containment-check logic in _resolve_kb_path() with a single shared helper to prevent security-critical divergence. * test: add path traversal tests for knowledge base create endpoint Add four tests covering the create endpoint attack surface: - single-level traversal (../victim_user/evil_kb) - absolute path injection (/tmp/evil) - prefix-ambiguity bypass (../activeuser_evil/secret_kb) - logger.warning observability on traversal attempt * [autofix.ci] apply automated fixes * [autofix.ci] apply automated fixes * [autofix.ci] apply automated fixes (attempt 2/3) --------- Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
1 parent 937de01 commit 41d34b2

2 files changed

Lines changed: 148 additions & 13 deletions

File tree

src/backend/base/langflow/api/v1/knowledge_bases.py

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,24 @@
3535
router = APIRouter(tags=["Knowledge Bases"], prefix="/knowledge_bases", include_in_schema=False)
3636

3737

38+
def _validate_kb_path_containment(kb_user_path: Path, kb_path: Path, kb_name: str, username: str) -> None:
39+
"""Raise 403 if kb_path is not contained within kb_user_path.
40+
41+
Uses is_relative_to() instead of startswith() to prevent path traversal attacks.
42+
startswith() has a prefix-ambiguity bug: a user named "alice" would incorrectly allow
43+
paths under "alice_evil/" because the string starts with "alice". is_relative_to() performs
44+
proper path containment checking.
45+
"""
46+
if not kb_path.is_relative_to(kb_user_path):
47+
logger.warning(
48+
"Path traversal attempt blocked: user=%s kb_name=%r resolved_path=%s",
49+
username,
50+
kb_name,
51+
kb_path,
52+
)
53+
raise HTTPException(status_code=403, detail=f"Access denied for knowledge base '{kb_name}'.")
54+
55+
3856
def _resolve_kb_path(kb_name: str, current_user: CurrentActiveUser) -> Path:
3957
"""Resolve and validate KB path.
4058
@@ -49,18 +67,7 @@ def _resolve_kb_path(kb_name: str, current_user: CurrentActiveUser) -> Path:
4967
kb_user_path = (kb_root_path / kb_user).resolve()
5068
kb_path = (kb_user_path / kb_name).resolve()
5169

52-
# Security: use is_relative_to() instead of startswith() to prevent path traversal attacks.
53-
# startswith() has a prefix-ambiguity bug: a user named "alice" would incorrectly allow
54-
# paths under "alice_evil/" because the string starts with "alice". is_relative_to() performs
55-
# proper path containment checking.
56-
if not kb_path.is_relative_to(kb_user_path):
57-
logger.warning(
58-
"Path traversal attempt blocked: user=%s kb_name=%r resolved_path=%s",
59-
kb_user,
60-
kb_name,
61-
kb_path,
62-
)
63-
raise HTTPException(status_code=403, detail=f"Access denied for knowledge base '{kb_name}'.")
70+
_validate_kb_path_containment(kb_user_path, kb_path, kb_name, kb_user)
6471

6572
if not kb_path.exists() or not kb_path.is_dir():
6673
raise HTTPException(status_code=404, detail=f"Knowledge base '{kb_name}' not found")
@@ -78,11 +85,17 @@ async def create_knowledge_base(
7885
kb_root_path = KBStorageHelper.get_root_path()
7986
kb_user = current_user.username
8087
kb_name = request.name.strip().replace(" ", "_")
81-
kb_path = kb_root_path / kb_user / kb_name
8288
# Validate KB name
8389
if not kb_name or len(kb_name) < MIN_KB_NAME_LENGTH:
8490
raise HTTPException(status_code=400, detail="Knowledge base name must be at least 3 characters")
8591

92+
# Security: resolve paths and validate containment to prevent path traversal attacks.
93+
# A crafted kb_name like "../victim/evil" or an absolute path like "/tmp/evil" must be
94+
# rejected before any directory is created.
95+
kb_user_path = (kb_root_path / kb_user).resolve()
96+
kb_path = (kb_user_path / kb_name).resolve()
97+
_validate_kb_path_containment(kb_user_path, kb_path, kb_name, kb_user)
98+
8699
# Check if KB already exists
87100
if kb_path.exists():
88101
raise HTTPException(status_code=409, detail=f"Knowledge base '{kb_name}' already exists")

src/backend/tests/unit/test_knowledge_bases_api.py

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,128 @@ async def test_create_knowledge_base(
170170
data = response.json()
171171
assert data["name"] == "New KB"
172172

173+
@patch("langflow.api.v1.knowledge_bases.KBStorageHelper.get_root_path")
174+
async def test_create_kb_path_traversal_single_level(
175+
self, mock_root, client: AsyncClient, logged_in_headers, tmp_path
176+
):
177+
"""Single-level traversal '../victim_user/evil_kb' in POST must be blocked with 400/403.
178+
179+
VULNERABILITY: the create endpoint builds kb_path = kb_root_path / kb_user / kb_name
180+
without resolve() or is_relative_to(), so '../victim_user/evil_kb' escapes the user dir.
181+
"""
182+
mock_root.return_value = tmp_path
183+
(tmp_path / "activeuser").mkdir(parents=True)
184+
victim_dir = tmp_path / "victim_user" / "evil_kb"
185+
186+
response = await client.post(
187+
"api/v1/knowledge_bases",
188+
headers=logged_in_headers,
189+
json={
190+
"name": "../victim_user/evil_kb",
191+
"embedding_provider": "OpenAI",
192+
"embedding_model": "text-embedding-3-small",
193+
},
194+
)
195+
196+
assert response.status_code in (400, 403), (
197+
f"VULNERABILITY CONFIRMED: create endpoint accepted traversal payload with status {response.status_code}"
198+
)
199+
assert not victim_dir.exists(), (
200+
"VULNERABILITY CONFIRMED: path traversal created a directory outside the user's KB root"
201+
)
202+
203+
@patch("langflow.api.v1.knowledge_bases.KBStorageHelper.get_root_path")
204+
async def test_create_kb_path_traversal_absolute_path(
205+
self, mock_root, client: AsyncClient, logged_in_headers, tmp_path
206+
):
207+
"""Absolute path in kb_name must be blocked — e.g. '/tmp/evil'.
208+
209+
VULNERABILITY: kb_root_path / kb_user / '/tmp/evil' resolves to '/tmp/evil' in Python
210+
because Path drops all previous components when a segment starts with '/'.
211+
"""
212+
mock_root.return_value = tmp_path
213+
(tmp_path / "activeuser").mkdir(parents=True)
214+
evil_dir = tmp_path / "evil_absolute"
215+
216+
response = await client.post(
217+
"api/v1/knowledge_bases",
218+
headers=logged_in_headers,
219+
json={
220+
"name": str(evil_dir),
221+
"embedding_provider": "OpenAI",
222+
"embedding_model": "text-embedding-3-small",
223+
},
224+
)
225+
226+
assert response.status_code in (400, 403), (
227+
f"VULNERABILITY CONFIRMED: create endpoint accepted absolute path payload "
228+
f"with status {response.status_code}"
229+
)
230+
assert not evil_dir.exists(), (
231+
"VULNERABILITY CONFIRMED: absolute path in kb_name created a directory outside the KB root"
232+
)
233+
234+
@patch("langflow.api.v1.knowledge_bases.KBStorageHelper.get_root_path")
235+
async def test_create_kb_path_traversal_prefix_ambiguity(
236+
self, mock_root, client: AsyncClient, logged_in_headers, tmp_path
237+
):
238+
"""Prefix-ambiguity attack on create: user='activeuser', target dir='activeuser_evil'.
239+
240+
With startswith('/root/activeuser'), the path '/root/activeuser_evil/secret_kb'
241+
incorrectly passes because the string starts with '/root/activeuser'.
242+
is_relative_to() closes this gap and must block the request with 400/403.
243+
"""
244+
mock_root.return_value = tmp_path
245+
246+
(tmp_path / "activeuser").mkdir(parents=True)
247+
victim_kb = tmp_path / "activeuser_evil" / "secret_kb"
248+
victim_kb.mkdir(parents=True)
249+
250+
response = await client.post(
251+
"api/v1/knowledge_bases",
252+
headers=logged_in_headers,
253+
json={
254+
"name": "../activeuser_evil/secret_kb",
255+
"embedding_provider": "OpenAI",
256+
"embedding_model": "text-embedding-3-small",
257+
},
258+
)
259+
260+
assert response.status_code in (400, 403), (
261+
"VULNERABILITY CONFIRMED: prefix-ambiguity bypass succeeded on create endpoint — "
262+
"startswith() may still be in use instead of is_relative_to()"
263+
)
264+
assert not (tmp_path / "activeuser_evil" / "secret_kb_new").exists(), (
265+
"VULNERABILITY CONFIRMED: prefix-ambiguity attack created a directory outside the user's KB root"
266+
)
267+
268+
@patch("langflow.api.v1.knowledge_bases.logger.warning")
269+
@patch("langflow.api.v1.knowledge_bases.KBStorageHelper.get_root_path")
270+
async def test_create_kb_path_traversal_logs_warning(
271+
self, mock_root, mock_warning, client: AsyncClient, logged_in_headers, tmp_path
272+
):
273+
"""A traversal attempt on create must emit a warning log with user= and kb_name= context."""
274+
mock_root.return_value = tmp_path
275+
276+
(tmp_path / "activeuser").mkdir(parents=True)
277+
(tmp_path / "victim_user" / "secret_kb").mkdir(parents=True)
278+
279+
await client.post(
280+
"api/v1/knowledge_bases",
281+
headers=logged_in_headers,
282+
json={
283+
"name": "../victim_user/secret_kb",
284+
"embedding_provider": "OpenAI",
285+
"embedding_model": "text-embedding-3-small",
286+
},
287+
)
288+
289+
mock_warning.assert_called_once()
290+
warning_args = mock_warning.call_args[0]
291+
all_args_str = str(warning_args)
292+
assert "user=" in all_args_str, "Warning log must contain 'user=' in the format string"
293+
assert "kb_name=" in all_args_str, "Warning log must contain 'kb_name=' in the format string"
294+
173295
async def test_create_kb_name_too_short(self, client: AsyncClient, logged_in_headers):
174296
response = await client.post(
175297
"api/v1/knowledge_bases",

0 commit comments

Comments
 (0)