Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
225 changes: 224 additions & 1 deletion apps/sim/app/api/files/parse/route.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ const {
mockFsWriteFile,
mockJoin,
actualPath,
mockFileExistsInWorkspace,
mockListWorkspaceFiles,
mockUploadWorkspaceFile,
} = vi.hoisted(() => {
// eslint-disable-next-line @typescript-eslint/no-require-imports
const actualPath = require('path') as typeof import('path')
Expand All @@ -49,7 +52,7 @@ const {
metadata: { pageCount: 1 },
}),
mockFsAccess: vi.fn().mockResolvedValue(undefined),
mockFsStat: vi.fn().mockImplementation(() => ({ isFile: () => true })),
mockFsStat: vi.fn().mockImplementation(() => ({ isFile: () => true, size: 17 })),
mockFsReadFile: vi.fn().mockResolvedValue(Buffer.from('test file content')),
mockFsWriteFile: vi.fn().mockResolvedValue(undefined),
mockJoin: vi.fn((...args: string[]): string => {
Expand All @@ -59,6 +62,9 @@ const {
return actualPath.join(...args)
}),
actualPath,
mockFileExistsInWorkspace: vi.fn().mockResolvedValue(false),
mockListWorkspaceFiles: vi.fn().mockResolvedValue([]),
mockUploadWorkspaceFile: vi.fn().mockResolvedValue({}),
}
})

Expand Down Expand Up @@ -104,6 +110,12 @@ vi.mock('@/lib/uploads/contexts/execution', () => ({
uploadExecutionFile: vi.fn(),
}))

vi.mock('@/lib/uploads/contexts/workspace', () => ({
fileExistsInWorkspace: mockFileExistsInWorkspace,
listWorkspaceFiles: mockListWorkspaceFiles,
uploadWorkspaceFile: mockUploadWorkspaceFile,
}))

vi.mock('@/lib/uploads/server/metadata', () => ({
getFileMetadataByKey: vi.fn(),
}))
Expand Down Expand Up @@ -175,7 +187,12 @@ describe('File Parse API Route', () => {
permissionsMockFns.mockGetUserEntityPermissions.mockResolvedValue({ canView: true })
storageServiceMockFns.mockHasCloudStorage.mockReturnValue(true)
storageServiceMockFns.mockDownloadFile.mockResolvedValue(Buffer.from('test file content'))
mockFsStat.mockResolvedValue({ isFile: () => true, size: 17 })
mockFsReadFile.mockResolvedValue(Buffer.from('test file content'))
mockIsSupportedFileType.mockReturnValue(true)
mockFileExistsInWorkspace.mockResolvedValue(false)
mockListWorkspaceFiles.mockResolvedValue([])
mockUploadWorkspaceFile.mockResolvedValue({})
mockParseFile.mockResolvedValue({
content: 'parsed content',
metadata: { pageCount: 1 },
Expand Down Expand Up @@ -311,6 +328,160 @@ describe('File Parse API Route', () => {
expect(data.results).toHaveLength(2)
})

it('should cap remaining download size while processing multi-file parse results', async () => {
inputValidationMockFns.mockValidateUrlWithDNS.mockResolvedValue({
isValid: true,
resolvedIP: '203.0.113.10',
})
inputValidationMockFns.mockSecureFetchWithPinnedIP.mockResolvedValue(
new Response('file content', {
status: 200,
headers: { 'content-type': 'text/plain' },
})
)

const fourMbContent = 'a'.repeat(4 * 1024 * 1024)
mockParseBuffer
.mockResolvedValueOnce({
content: fourMbContent,
metadata: { pageCount: 1 },
})
.mockResolvedValueOnce({
content: 'second file',
metadata: { pageCount: 1 },
})

const req = createMockRequest('POST', {
filePath: ['https://example.com/file1.txt', 'https://example.com/file2.txt'],
})

const response = await POST(req)
const data = await response.json()

expect(response.status).toBe(200)
expect(data.results).toHaveLength(2)
expect(inputValidationMockFns.mockSecureFetchWithPinnedIP).toHaveBeenNthCalledWith(
1,
'https://example.com/file1.txt',
'203.0.113.10',
expect.objectContaining({ maxResponseBytes: 5 * 1024 * 1024 })
)
expect(inputValidationMockFns.mockSecureFetchWithPinnedIP).toHaveBeenNthCalledWith(
2,
'https://example.com/file2.txt',
'203.0.113.10',
expect.objectContaining({ maxResponseBytes: 1024 * 1024 })
)
})

it('should preserve the remaining multi-file cap when an external URL reuses a workspace file', async () => {
inputValidationMockFns.mockValidateUrlWithDNS.mockResolvedValue({
isValid: true,
resolvedIP: '203.0.113.10',
})
inputValidationMockFns.mockSecureFetchWithPinnedIP.mockResolvedValue(
new Response('file content', {
status: 200,
headers: { 'content-type': 'text/plain' },
})
)
mockFileExistsInWorkspace.mockResolvedValueOnce(false).mockResolvedValueOnce(true)
mockListWorkspaceFiles.mockResolvedValueOnce([
{ name: 'file2.txt', key: 'workspace-file2.txt' },
])

mockParseBuffer
.mockResolvedValueOnce({
content: 'a'.repeat(4 * 1024 * 1024),
metadata: { pageCount: 1 },
})
.mockResolvedValueOnce({
content: 'second file',
metadata: { pageCount: 1 },
})

const req = createMockRequest('POST', {
filePath: ['https://example.com/file1.txt', 'https://example.com/file2.txt'],
workspaceId: 'workspace-id',
})

const response = await POST(req)
const data = await response.json()

expect(response.status).toBe(200)
expect(data.results).toHaveLength(2)
expect(storageServiceMockFns.mockDownloadFile).toHaveBeenCalledWith(
expect.objectContaining({ key: 'workspace-file2.txt', maxBytes: 1024 * 1024 })
)
})

it('should stop multi-file parsing once the combined parsed output is too large', async () => {
inputValidationMockFns.mockValidateUrlWithDNS.mockResolvedValue({
isValid: true,
resolvedIP: '203.0.113.10',
})
inputValidationMockFns.mockSecureFetchWithPinnedIP.mockResolvedValue(
new Response('file content', {
status: 200,
headers: { 'content-type': 'text/plain' },
})
)

mockParseBuffer.mockResolvedValueOnce({
content: 'a'.repeat(5 * 1024 * 1024 + 1),
metadata: { pageCount: 1 },
})

const req = createMockRequest('POST', {
filePath: ['https://example.com/file1.txt', 'https://example.com/file2.txt'],
})

const response = await POST(req)
const data = await response.json()

expect(response.status).toBe(413)
expect(data.success).toBe(false)
expect(data.error).toContain('too large')
expect(inputValidationMockFns.mockSecureFetchWithPinnedIP).toHaveBeenCalledTimes(1)
})

it('should include successful multi-file parse results when a later file exceeds the cap', async () => {
inputValidationMockFns.mockValidateUrlWithDNS.mockResolvedValue({
isValid: true,
resolvedIP: '203.0.113.10',
})
inputValidationMockFns.mockSecureFetchWithPinnedIP.mockResolvedValue(
new Response('file content', {
status: 200,
headers: { 'content-type': 'text/plain' },
})
)

mockParseBuffer
.mockResolvedValueOnce({
content: 'first file',
metadata: { pageCount: 1 },
})
.mockResolvedValueOnce({
content: 'a'.repeat(5 * 1024 * 1024),
metadata: { pageCount: 1 },
})

const req = createMockRequest('POST', {
filePath: ['https://example.com/file1.txt', 'https://example.com/file2.txt'],
})

const response = await POST(req)
const data = await response.json()

expect(response.status).toBe(413)
expect(data.success).toBe(false)
expect(data.error).toContain('too large')
expect(data.results).toHaveLength(1)
expect(data.results[0].output.content).toBe('first file')
expect(inputValidationMockFns.mockSecureFetchWithPinnedIP).toHaveBeenCalledTimes(2)
})

it('should pass custom headers when fetching external URLs', async () => {
inputValidationMockFns.mockValidateUrlWithDNS.mockResolvedValue({
isValid: true,
Expand Down Expand Up @@ -344,6 +515,58 @@ describe('File Parse API Route', () => {
)
})

it('should reject oversized external downloads before reading the body', async () => {
inputValidationMockFns.mockValidateUrlWithDNS.mockResolvedValue({
isValid: true,
resolvedIP: '203.0.113.10',
})
inputValidationMockFns.mockSecureFetchWithPinnedIP.mockResolvedValue(
new Response('oversized', {
status: 200,
headers: { 'content-length': '104857601', 'content-type': 'text/plain' },
})
)

const req = createMockRequest('POST', {
filePath: 'https://example.com/large.txt',
})

const response = await POST(req)
const data = await response.json()

expect(response.status).toBe(200)
expect(data.success).toBe(false)
expect(data.error).toContain('too large')
expect(inputValidationMockFns.mockSecureFetchWithPinnedIP).toHaveBeenCalledWith(
'https://example.com/large.txt',
'203.0.113.10',
expect.objectContaining({
maxResponseBytes: 104857600,
})
)
})

it('should reject oversized local files before materializing them', async () => {
setupFileApiMocks({
cloudEnabled: false,
storageProvider: 'local',
authenticated: true,
})
mockFsStat.mockResolvedValue({ isFile: () => true, size: 104857601 })

const req = createMockRequest('POST', {
filePath: 'workspace/large.txt',
})

const response = await POST(req)
const data = await response.json()

expect(response.status).toBe(200)
expect(data.success).toBe(false)
expect(data.error).toContain('too large')
expect(mockFsReadFile).not.toHaveBeenCalled()
})

it('should process execution file URLs with context query param', async () => {
setupFileApiMocks({
cloudEnabled: true,
Expand Down
Loading
Loading