Skip to content

Commit 242f173

Browse files
authored
feat: support .dockerignore in project archiving (#48)
2 parents ca12b89 + bdd7715 commit 242f173

8 files changed

Lines changed: 725 additions & 166 deletions

File tree

.gitignore

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,4 +182,12 @@ local_test/
182182
# deprecated files
183183
*deprecated*
184184

185-
.trae/
185+
.trae/
186+
187+
188+
# Agent Coders
189+
AGENTS.md
190+
AGENTKIT_CODEBASE_GUIDE.md
191+
.opencode/
192+
.specify/
193+
.claude/

agentkit/toolkit/builders/ve_pipeline.py

Lines changed: 132 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import logging
1717
import tempfile
1818
import uuid
19+
import sys
1920
from pathlib import Path
2021
from typing import Dict, Any, Optional
2122
from dataclasses import dataclass, field
@@ -666,7 +667,8 @@ def _create_project_archive(self, config: VeCPCRBuilderConfig) -> str:
666667
"""
667668
try:
668669
from agentkit.toolkit.volcengine.utils.project_archiver import (
669-
create_project_archive,
670+
ArchiveConfig,
671+
ProjectArchiver,
670672
)
671673

672674
common_config = config.common_config
@@ -678,12 +680,54 @@ def _create_project_archive(self, config: VeCPCRBuilderConfig) -> str:
678680
temp_dir = tempfile.mkdtemp()
679681
source_base_path = self.workdir
680682

681-
# Create archive using project archiver utility
682-
archive_path = create_project_archive(
683+
dockerignore_path = source_base_path / ".dockerignore"
684+
dockerignore_path_str = (
685+
str(dockerignore_path) if dockerignore_path.is_file() else None
686+
)
687+
688+
archive_config = ArchiveConfig(
683689
source_dir=str(source_base_path),
684690
output_dir=temp_dir,
685691
archive_name=archive_name,
692+
dockerignore_path=dockerignore_path_str,
686693
)
694+
archiver = ProjectArchiver(archive_config)
695+
files_to_include = archiver.collect_files_to_include()
696+
697+
size_threshold_bytes = 100 * 1024 * 1024
698+
total_size_bytes = 0
699+
included_files_with_size: list[tuple[str, int]] = []
700+
for p in files_to_include:
701+
try:
702+
rel = p.relative_to(source_base_path).as_posix()
703+
size = p.stat().st_size
704+
except Exception:
705+
continue
706+
total_size_bytes += size
707+
included_files_with_size.append((rel, size))
708+
709+
if total_size_bytes > size_threshold_bytes:
710+
self._warn_large_archive(
711+
total_size_bytes=total_size_bytes,
712+
threshold_bytes=size_threshold_bytes,
713+
included_files_with_size=included_files_with_size,
714+
)
715+
716+
if sys.stdin.isatty():
717+
confirmed = self.reporter.confirm(
718+
message=(
719+
"The archive to be uploaded is larger than 100 MiB. "
720+
"Do you want to continue uploading all included files?"
721+
),
722+
default=False,
723+
)
724+
if not confirmed:
725+
raise Exception(
726+
"Archive upload cancelled by user (archive size exceeds 100 MiB)."
727+
)
728+
729+
# Create archive using pre-collected file list (avoid re-walking directories)
730+
archive_path = archiver.create_archive(files_to_include=files_to_include)
687731

688732
logger.info(f"Project archive created: {archive_path}")
689733
self.reporter.success(f"Project archive created: {archive_path}")
@@ -692,6 +736,91 @@ def _create_project_archive(self, config: VeCPCRBuilderConfig) -> str:
692736
except Exception as e:
693737
raise Exception(f"Failed to create project archive: {str(e)}")
694738

739+
def _warn_large_archive(
740+
self,
741+
total_size_bytes: int,
742+
threshold_bytes: int,
743+
included_files_with_size: list[tuple[str, int]],
744+
) -> None:
745+
def format_bytes(num_bytes: int) -> str:
746+
units = ["B", "KiB", "MiB", "GiB", "TiB"]
747+
value = float(num_bytes)
748+
for unit in units:
749+
if value < 1024 or unit == units[-1]:
750+
if unit == "B":
751+
return f"{int(value)} {unit}"
752+
return f"{value:.2f} {unit}"
753+
value /= 1024
754+
return f"{value:.2f} TiB"
755+
756+
included_count = len(included_files_with_size)
757+
size_str = format_bytes(total_size_bytes)
758+
threshold_str = format_bytes(threshold_bytes)
759+
760+
self.reporter.warning(
761+
f"Archive size is {size_str} (threshold: {threshold_str}). Included files: {included_count}."
762+
)
763+
764+
# Top-level folder summary (by total size)
765+
top_level_sizes: dict[str, int] = {}
766+
for rel_path, size in included_files_with_size:
767+
top = rel_path.split("/", 1)[0] if "/" in rel_path else rel_path
768+
key = f"{top}/" if top and top != rel_path else top
769+
top_level_sizes[key] = top_level_sizes.get(key, 0) + size
770+
771+
top_level_lines = [
772+
"Top-level paths by total size (descending):",
773+
]
774+
for path, size in sorted(
775+
top_level_sizes.items(), key=lambda kv: kv[1], reverse=True
776+
)[:30]:
777+
top_level_lines.append(f" {format_bytes(size):>10} {path}")
778+
779+
# Largest files list
780+
largest_lines = ["Largest files (descending):"]
781+
for rel_path, size in sorted(
782+
included_files_with_size, key=lambda kv: kv[1], reverse=True
783+
)[:50]:
784+
largest_lines.append(f" {format_bytes(size):>10} {rel_path}")
785+
786+
# Sample file list (lexicographic)
787+
sample_limit = 200
788+
sample_lines = [f"Included files (first {sample_limit} of {included_count}):"]
789+
for rel_path, _size in sorted(included_files_with_size, key=lambda kv: kv[0])[
790+
:sample_limit
791+
]:
792+
sample_lines.append(f" {rel_path}")
793+
if included_count > sample_limit:
794+
sample_lines.append(
795+
f" ... ({included_count - sample_limit} more files not shown)"
796+
)
797+
798+
hint_lines = [
799+
"How to exclude files from the upload:",
800+
" 1) Edit .dockerignore in your project root.",
801+
" 2) Add ignore rules, for example:",
802+
" - data/",
803+
" - *.log",
804+
" - .venv/",
805+
" - **/*.bin",
806+
" 3) Re-run agentkit build/launch.",
807+
]
808+
809+
lines = []
810+
lines.extend(top_level_lines)
811+
lines.append("")
812+
lines.extend(largest_lines)
813+
lines.append("")
814+
lines.extend(sample_lines)
815+
lines.append("")
816+
lines.extend(hint_lines)
817+
818+
self.reporter.show_logs(
819+
title="Large archive detected (review before upload)",
820+
lines=lines,
821+
max_lines=400,
822+
)
823+
695824
def _upload_to_tos(
696825
self, archive_path: str, config: VeCPCRBuilderConfig
697826
) -> tuple[str, str]:

agentkit/toolkit/docker/utils.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,38 @@ def create_dockerignore_file(
5858
"*.py[cod]",
5959
"*$py.class",
6060
"",
61+
"# Test / lint caches",
62+
".pytest_cache/",
63+
".mypy_cache/",
64+
".ruff_cache/",
65+
".tox/",
66+
".nox/",
67+
"",
68+
"# Coverage",
69+
".coverage",
70+
"coverage.xml",
71+
"htmlcov/",
72+
"",
73+
"# Logs / temp",
74+
"*.log",
75+
"*.tmp",
76+
"*.swp",
77+
"*.swo",
78+
"",
79+
"# Build outputs",
80+
"dist/",
81+
"build/",
82+
"*.egg-info/",
83+
".eggs/",
84+
"",
85+
"# OS junk",
86+
".DS_Store",
87+
"Thumbs.db",
88+
"",
89+
"# Environment files",
90+
".env",
91+
".env.*",
92+
"",
6193
"# Virtual environments",
6294
".venv/",
6395
"venv/",

0 commit comments

Comments
 (0)