@@ -31,6 +31,9 @@ const {
3131 mockFsWriteFile,
3232 mockJoin,
3333 actualPath,
34+ mockFileExistsInWorkspace,
35+ mockListWorkspaceFiles,
36+ mockUploadWorkspaceFile,
3437} = vi . hoisted ( ( ) => {
3538 // eslint-disable-next-line @typescript-eslint/no-require-imports
3639 const actualPath = require ( 'path' ) as typeof import ( 'path' )
@@ -49,7 +52,7 @@ const {
4952 metadata : { pageCount : 1 } ,
5053 } ) ,
5154 mockFsAccess : vi . fn ( ) . mockResolvedValue ( undefined ) ,
52- mockFsStat : vi . fn ( ) . mockImplementation ( ( ) => ( { isFile : ( ) => true } ) ) ,
55+ mockFsStat : vi . fn ( ) . mockImplementation ( ( ) => ( { isFile : ( ) => true , size : 17 } ) ) ,
5356 mockFsReadFile : vi . fn ( ) . mockResolvedValue ( Buffer . from ( 'test file content' ) ) ,
5457 mockFsWriteFile : vi . fn ( ) . mockResolvedValue ( undefined ) ,
5558 mockJoin : vi . fn ( ( ...args : string [ ] ) : string => {
@@ -59,6 +62,9 @@ const {
5962 return actualPath . join ( ...args )
6063 } ) ,
6164 actualPath,
65+ mockFileExistsInWorkspace : vi . fn ( ) . mockResolvedValue ( false ) ,
66+ mockListWorkspaceFiles : vi . fn ( ) . mockResolvedValue ( [ ] ) ,
67+ mockUploadWorkspaceFile : vi . fn ( ) . mockResolvedValue ( { } ) ,
6268 }
6369} )
6470
@@ -104,6 +110,12 @@ vi.mock('@/lib/uploads/contexts/execution', () => ({
104110 uploadExecutionFile : vi . fn ( ) ,
105111} ) )
106112
113+ vi . mock ( '@/lib/uploads/contexts/workspace' , ( ) => ( {
114+ fileExistsInWorkspace : mockFileExistsInWorkspace ,
115+ listWorkspaceFiles : mockListWorkspaceFiles ,
116+ uploadWorkspaceFile : mockUploadWorkspaceFile ,
117+ } ) )
118+
107119vi . mock ( '@/lib/uploads/server/metadata' , ( ) => ( {
108120 getFileMetadataByKey : vi . fn ( ) ,
109121} ) )
@@ -175,7 +187,12 @@ describe('File Parse API Route', () => {
175187 permissionsMockFns . mockGetUserEntityPermissions . mockResolvedValue ( { canView : true } )
176188 storageServiceMockFns . mockHasCloudStorage . mockReturnValue ( true )
177189 storageServiceMockFns . mockDownloadFile . mockResolvedValue ( Buffer . from ( 'test file content' ) )
190+ mockFsStat . mockResolvedValue ( { isFile : ( ) => true , size : 17 } )
191+ mockFsReadFile . mockResolvedValue ( Buffer . from ( 'test file content' ) )
178192 mockIsSupportedFileType . mockReturnValue ( true )
193+ mockFileExistsInWorkspace . mockResolvedValue ( false )
194+ mockListWorkspaceFiles . mockResolvedValue ( [ ] )
195+ mockUploadWorkspaceFile . mockResolvedValue ( { } )
179196 mockParseFile . mockResolvedValue ( {
180197 content : 'parsed content' ,
181198 metadata : { pageCount : 1 } ,
@@ -311,6 +328,123 @@ describe('File Parse API Route', () => {
311328 expect ( data . results ) . toHaveLength ( 2 )
312329 } )
313330
331+ it ( 'should cap remaining download size while processing multi-file parse results' , async ( ) => {
332+ inputValidationMockFns . mockValidateUrlWithDNS . mockResolvedValue ( {
333+ isValid : true ,
334+ resolvedIP : '203.0.113.10' ,
335+ } )
336+ inputValidationMockFns . mockSecureFetchWithPinnedIP . mockResolvedValue (
337+ new Response ( 'file content' , {
338+ status : 200 ,
339+ headers : { 'content-type' : 'text/plain' } ,
340+ } )
341+ )
342+
343+ const fourMbContent = 'a' . repeat ( 4 * 1024 * 1024 )
344+ mockParseBuffer
345+ . mockResolvedValueOnce ( {
346+ content : fourMbContent ,
347+ metadata : { pageCount : 1 } ,
348+ } )
349+ . mockResolvedValueOnce ( {
350+ content : 'second file' ,
351+ metadata : { pageCount : 1 } ,
352+ } )
353+
354+ const req = createMockRequest ( 'POST' , {
355+ filePath : [ 'https://example.com/file1.txt' , 'https://example.com/file2.txt' ] ,
356+ } )
357+
358+ const response = await POST ( req )
359+ const data = await response . json ( )
360+
361+ expect ( response . status ) . toBe ( 200 )
362+ expect ( data . results ) . toHaveLength ( 2 )
363+ expect ( inputValidationMockFns . mockSecureFetchWithPinnedIP ) . toHaveBeenNthCalledWith (
364+ 1 ,
365+ 'https://example.com/file1.txt' ,
366+ '203.0.113.10' ,
367+ expect . objectContaining ( { maxResponseBytes : 5 * 1024 * 1024 } )
368+ )
369+ expect ( inputValidationMockFns . mockSecureFetchWithPinnedIP ) . toHaveBeenNthCalledWith (
370+ 2 ,
371+ 'https://example.com/file2.txt' ,
372+ '203.0.113.10' ,
373+ expect . objectContaining ( { maxResponseBytes : 1024 * 1024 } )
374+ )
375+ } )
376+
377+ it ( 'should preserve the remaining multi-file cap when an external URL reuses a workspace file' , async ( ) => {
378+ inputValidationMockFns . mockValidateUrlWithDNS . mockResolvedValue ( {
379+ isValid : true ,
380+ resolvedIP : '203.0.113.10' ,
381+ } )
382+ inputValidationMockFns . mockSecureFetchWithPinnedIP . mockResolvedValue (
383+ new Response ( 'file content' , {
384+ status : 200 ,
385+ headers : { 'content-type' : 'text/plain' } ,
386+ } )
387+ )
388+ mockFileExistsInWorkspace . mockResolvedValueOnce ( false ) . mockResolvedValueOnce ( true )
389+ mockListWorkspaceFiles . mockResolvedValueOnce ( [
390+ { name : 'file2.txt' , key : 'workspace-file2.txt' } ,
391+ ] )
392+
393+ mockParseBuffer
394+ . mockResolvedValueOnce ( {
395+ content : 'a' . repeat ( 4 * 1024 * 1024 ) ,
396+ metadata : { pageCount : 1 } ,
397+ } )
398+ . mockResolvedValueOnce ( {
399+ content : 'second file' ,
400+ metadata : { pageCount : 1 } ,
401+ } )
402+
403+ const req = createMockRequest ( 'POST' , {
404+ filePath : [ 'https://example.com/file1.txt' , 'https://example.com/file2.txt' ] ,
405+ workspaceId : 'workspace-id' ,
406+ } )
407+
408+ const response = await POST ( req )
409+ const data = await response . json ( )
410+
411+ expect ( response . status ) . toBe ( 200 )
412+ expect ( data . results ) . toHaveLength ( 2 )
413+ expect ( storageServiceMockFns . mockDownloadFile ) . toHaveBeenCalledWith (
414+ expect . objectContaining ( { key : 'workspace-file2.txt' , maxBytes : 1024 * 1024 } )
415+ )
416+ } )
417+
418+ it ( 'should stop multi-file parsing once the combined parsed output is too large' , async ( ) => {
419+ inputValidationMockFns . mockValidateUrlWithDNS . mockResolvedValue ( {
420+ isValid : true ,
421+ resolvedIP : '203.0.113.10' ,
422+ } )
423+ inputValidationMockFns . mockSecureFetchWithPinnedIP . mockResolvedValue (
424+ new Response ( 'file content' , {
425+ status : 200 ,
426+ headers : { 'content-type' : 'text/plain' } ,
427+ } )
428+ )
429+
430+ mockParseBuffer . mockResolvedValueOnce ( {
431+ content : 'a' . repeat ( 5 * 1024 * 1024 + 1 ) ,
432+ metadata : { pageCount : 1 } ,
433+ } )
434+
435+ const req = createMockRequest ( 'POST' , {
436+ filePath : [ 'https://example.com/file1.txt' , 'https://example.com/file2.txt' ] ,
437+ } )
438+
439+ const response = await POST ( req )
440+ const data = await response . json ( )
441+
442+ expect ( response . status ) . toBe ( 413 )
443+ expect ( data . success ) . toBe ( false )
444+ expect ( data . error ) . toContain ( 'too large' )
445+ expect ( inputValidationMockFns . mockSecureFetchWithPinnedIP ) . toHaveBeenCalledTimes ( 1 )
446+ } )
447+
314448 it ( 'should pass custom headers when fetching external URLs' , async ( ) => {
315449 inputValidationMockFns . mockValidateUrlWithDNS . mockResolvedValue ( {
316450 isValid : true ,
@@ -344,6 +478,58 @@ describe('File Parse API Route', () => {
344478 )
345479 } )
346480
481+ it ( 'should reject oversized external downloads before reading the body' , async ( ) => {
482+ inputValidationMockFns . mockValidateUrlWithDNS . mockResolvedValue ( {
483+ isValid : true ,
484+ resolvedIP : '203.0.113.10' ,
485+ } )
486+ inputValidationMockFns . mockSecureFetchWithPinnedIP . mockResolvedValue (
487+ new Response ( 'oversized' , {
488+ status : 200 ,
489+ headers : { 'content-length' : '104857601' , 'content-type' : 'text/plain' } ,
490+ } )
491+ )
492+
493+ const req = createMockRequest ( 'POST' , {
494+ filePath : 'https://example.com/large.txt' ,
495+ } )
496+
497+ const response = await POST ( req )
498+ const data = await response . json ( )
499+
500+ expect ( response . status ) . toBe ( 200 )
501+ expect ( data . success ) . toBe ( false )
502+ expect ( data . error ) . toContain ( 'too large' )
503+ expect ( inputValidationMockFns . mockSecureFetchWithPinnedIP ) . toHaveBeenCalledWith (
504+ 'https://example.com/large.txt' ,
505+ '203.0.113.10' ,
506+ expect . objectContaining ( {
507+ maxResponseBytes : 104857600 ,
508+ } )
509+ )
510+ } )
511+
512+ it ( 'should reject oversized local files before materializing them' , async ( ) => {
513+ setupFileApiMocks ( {
514+ cloudEnabled : false ,
515+ storageProvider : 'local' ,
516+ authenticated : true ,
517+ } )
518+ mockFsStat . mockResolvedValue ( { isFile : ( ) => true , size : 104857601 } )
519+
520+ const req = createMockRequest ( 'POST' , {
521+ filePath : 'workspace/large.txt' ,
522+ } )
523+
524+ const response = await POST ( req )
525+ const data = await response . json ( )
526+
527+ expect ( response . status ) . toBe ( 200 )
528+ expect ( data . success ) . toBe ( false )
529+ expect ( data . error ) . toContain ( 'too large' )
530+ expect ( mockFsReadFile ) . not . toHaveBeenCalled ( )
531+ } )
532+
347533 it ( 'should process execution file URLs with context query param' , async ( ) => {
348534 setupFileApiMocks ( {
349535 cloudEnabled : true ,
0 commit comments