1616import logging
1717import tempfile
1818import uuid
19+ import sys
1920from pathlib import Path
2021from typing import Dict , Any , Optional
2122from dataclasses import dataclass , field
@@ -666,7 +667,8 @@ def _create_project_archive(self, config: VeCPCRBuilderConfig) -> str:
666667 """
667668 try :
668669 from agentkit .toolkit .volcengine .utils .project_archiver import (
669- create_project_archive ,
670+ ArchiveConfig ,
671+ ProjectArchiver ,
670672 )
671673
672674 common_config = config .common_config
@@ -678,12 +680,54 @@ def _create_project_archive(self, config: VeCPCRBuilderConfig) -> str:
678680 temp_dir = tempfile .mkdtemp ()
679681 source_base_path = self .workdir
680682
681- # Create archive using project archiver utility
682- archive_path = create_project_archive (
683+ dockerignore_path = source_base_path / ".dockerignore"
684+ dockerignore_path_str = (
685+ str (dockerignore_path ) if dockerignore_path .is_file () else None
686+ )
687+
688+ archive_config = ArchiveConfig (
683689 source_dir = str (source_base_path ),
684690 output_dir = temp_dir ,
685691 archive_name = archive_name ,
692+ dockerignore_path = dockerignore_path_str ,
686693 )
694+ archiver = ProjectArchiver (archive_config )
695+ files_to_include = archiver .collect_files_to_include ()
696+
697+ size_threshold_bytes = 100 * 1024 * 1024
698+ total_size_bytes = 0
699+ included_files_with_size : list [tuple [str , int ]] = []
700+ for p in files_to_include :
701+ try :
702+ rel = p .relative_to (source_base_path ).as_posix ()
703+ size = p .stat ().st_size
704+ except Exception :
705+ continue
706+ total_size_bytes += size
707+ included_files_with_size .append ((rel , size ))
708+
709+ if total_size_bytes > size_threshold_bytes :
710+ self ._warn_large_archive (
711+ total_size_bytes = total_size_bytes ,
712+ threshold_bytes = size_threshold_bytes ,
713+ included_files_with_size = included_files_with_size ,
714+ )
715+
716+ if sys .stdin .isatty ():
717+ confirmed = self .reporter .confirm (
718+ message = (
719+ "The archive to be uploaded is larger than 100 MiB. "
720+ "Do you want to continue uploading all included files?"
721+ ),
722+ default = False ,
723+ )
724+ if not confirmed :
725+ raise Exception (
726+ "Archive upload cancelled by user (archive size exceeds 100 MiB)."
727+ )
728+
729+ # Create archive using pre-collected file list (avoid re-walking directories)
730+ archive_path = archiver .create_archive (files_to_include = files_to_include )
687731
688732 logger .info (f"Project archive created: { archive_path } " )
689733 self .reporter .success (f"Project archive created: { archive_path } " )
@@ -692,6 +736,91 @@ def _create_project_archive(self, config: VeCPCRBuilderConfig) -> str:
692736 except Exception as e :
693737 raise Exception (f"Failed to create project archive: { str (e )} " )
694738
739+ def _warn_large_archive (
740+ self ,
741+ total_size_bytes : int ,
742+ threshold_bytes : int ,
743+ included_files_with_size : list [tuple [str , int ]],
744+ ) -> None :
745+ def format_bytes (num_bytes : int ) -> str :
746+ units = ["B" , "KiB" , "MiB" , "GiB" , "TiB" ]
747+ value = float (num_bytes )
748+ for unit in units :
749+ if value < 1024 or unit == units [- 1 ]:
750+ if unit == "B" :
751+ return f"{ int (value )} { unit } "
752+ return f"{ value :.2f} { unit } "
753+ value /= 1024
754+ return f"{ value :.2f} TiB"
755+
756+ included_count = len (included_files_with_size )
757+ size_str = format_bytes (total_size_bytes )
758+ threshold_str = format_bytes (threshold_bytes )
759+
760+ self .reporter .warning (
761+ f"Archive size is { size_str } (threshold: { threshold_str } ). Included files: { included_count } ."
762+ )
763+
764+ # Top-level folder summary (by total size)
765+ top_level_sizes : dict [str , int ] = {}
766+ for rel_path , size in included_files_with_size :
767+ top = rel_path .split ("/" , 1 )[0 ] if "/" in rel_path else rel_path
768+ key = f"{ top } /" if top and top != rel_path else top
769+ top_level_sizes [key ] = top_level_sizes .get (key , 0 ) + size
770+
771+ top_level_lines = [
772+ "Top-level paths by total size (descending):" ,
773+ ]
774+ for path , size in sorted (
775+ top_level_sizes .items (), key = lambda kv : kv [1 ], reverse = True
776+ )[:30 ]:
777+ top_level_lines .append (f" { format_bytes (size ):>10} { path } " )
778+
779+ # Largest files list
780+ largest_lines = ["Largest files (descending):" ]
781+ for rel_path , size in sorted (
782+ included_files_with_size , key = lambda kv : kv [1 ], reverse = True
783+ )[:50 ]:
784+ largest_lines .append (f" { format_bytes (size ):>10} { rel_path } " )
785+
786+ # Sample file list (lexicographic)
787+ sample_limit = 200
788+ sample_lines = [f"Included files (first { sample_limit } of { included_count } ):" ]
789+ for rel_path , _size in sorted (included_files_with_size , key = lambda kv : kv [0 ])[
790+ :sample_limit
791+ ]:
792+ sample_lines .append (f" { rel_path } " )
793+ if included_count > sample_limit :
794+ sample_lines .append (
795+ f" ... ({ included_count - sample_limit } more files not shown)"
796+ )
797+
798+ hint_lines = [
799+ "How to exclude files from the upload:" ,
800+ " 1) Edit .dockerignore in your project root." ,
801+ " 2) Add ignore rules, for example:" ,
802+ " - data/" ,
803+ " - *.log" ,
804+ " - .venv/" ,
805+ " - **/*.bin" ,
806+ " 3) Re-run agentkit build/launch." ,
807+ ]
808+
809+ lines = []
810+ lines .extend (top_level_lines )
811+ lines .append ("" )
812+ lines .extend (largest_lines )
813+ lines .append ("" )
814+ lines .extend (sample_lines )
815+ lines .append ("" )
816+ lines .extend (hint_lines )
817+
818+ self .reporter .show_logs (
819+ title = "Large archive detected (review before upload)" ,
820+ lines = lines ,
821+ max_lines = 400 ,
822+ )
823+
695824 def _upload_to_tos (
696825 self , archive_path : str , config : VeCPCRBuilderConfig
697826 ) -> tuple [str , str ]:
0 commit comments