Skip to content

Commit 49b2adc

Browse files
phernandezgithub-actions[bot]claude
authored
fix: skip archive files during cloud upload (#420)
Signed-off-by: phernandez <paul@basicmachines.co> Co-authored-by: claude[bot] <41898282+claude[bot]@users.noreply.github.com> Co-authored-by: Claude <noreply@anthropic.com> Co-authored-by: Paul Hernandez <phernandez@users.noreply.github.com>
1 parent 1646572 commit 49b2adc

2 files changed

Lines changed: 85 additions & 2 deletions

File tree

src/basic_memory/cli/commands/cloud/upload.py

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@
1010
from basic_memory.mcp.async_client import get_client
1111
from basic_memory.mcp.tools.utils import call_put
1212

13+
# Archive file extensions that should be skipped during upload
14+
ARCHIVE_EXTENSIONS = {".zip", ".tar", ".gz", ".bz2", ".xz", ".7z", ".rar", ".tgz", ".tbz2"}
15+
1316

1417
async def upload_path(
1518
local_path: Path,
@@ -61,11 +64,18 @@ async def upload_path(
6164

6265
# Calculate total size
6366
total_bytes = sum(file_path.stat().st_size for file_path, _ in files_to_upload)
67+
skipped_count = 0
6468

6569
# If dry run, just show what would be uploaded
6670
if dry_run:
6771
print("\nFiles that would be uploaded:")
6872
for file_path, relative_path in files_to_upload:
73+
# Skip archive files
74+
if _is_archive_file(file_path):
75+
print(f" [SKIP] {relative_path} (archive file)")
76+
skipped_count += 1
77+
continue
78+
6979
size = file_path.stat().st_size
7080
if size < 1024:
7181
size_str = f"{size} bytes"
@@ -78,6 +88,12 @@ async def upload_path(
7888
# Upload files using httpx
7989
async with get_client() as client:
8090
for i, (file_path, relative_path) in enumerate(files_to_upload, 1):
91+
# Skip archive files (zip, tar, gz, etc.)
92+
if _is_archive_file(file_path):
93+
print(f"Skipping archive file: {relative_path} ({i}/{len(files_to_upload)})")
94+
skipped_count += 1
95+
continue
96+
8197
# Build remote path: /webdav/{project_name}/{relative_path}
8298
remote_path = f"/webdav/{project_name}/{relative_path}"
8399
print(f"Uploading {relative_path} ({i}/{len(files_to_upload)})")
@@ -105,10 +121,15 @@ async def upload_path(
105121
else:
106122
size_str = f"{total_bytes / (1024 * 1024):.1f} MB"
107123

124+
uploaded_count = len(files_to_upload) - skipped_count
108125
if dry_run:
109-
print(f"\nTotal: {len(files_to_upload)} file(s) ({size_str})")
126+
print(f"\nTotal: {uploaded_count} file(s) ({size_str})")
127+
if skipped_count > 0:
128+
print(f" Would skip {skipped_count} archive file(s)")
110129
else:
111-
print(f"Upload complete: {len(files_to_upload)} file(s) ({size_str})")
130+
print(f"✓ Upload complete: {uploaded_count} file(s) ({size_str})")
131+
if skipped_count > 0:
132+
print(f" Skipped {skipped_count} archive file(s)")
112133

113134
return True
114135

@@ -120,6 +141,19 @@ async def upload_path(
120141
return False
121142

122143

144+
def _is_archive_file(file_path: Path) -> bool:
145+
"""
146+
Check if a file is an archive file based on its extension.
147+
148+
Args:
149+
file_path: Path to the file to check
150+
151+
Returns:
152+
True if file is an archive, False otherwise
153+
"""
154+
return file_path.suffix.lower() in ARCHIVE_EXTENSIONS
155+
156+
123157
def _get_files_to_upload(
124158
directory: Path, verbose: bool = False, use_gitignore: bool = True
125159
) -> list[tuple[Path, str]]:

tests/cli/test_upload.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,55 @@ async def test_builds_correct_webdav_path(self, tmp_path):
327327
call_args = mock_put.call_args
328328
assert call_args[0][1] == "/webdav/my-project/subdir/file.txt"
329329

330+
@pytest.mark.asyncio
331+
async def test_skips_archive_files(self, tmp_path, capsys):
332+
"""Test that archive files are skipped during upload."""
333+
# Create test files including archives
334+
(tmp_path / "notes.md").write_text("content")
335+
(tmp_path / "backup.zip").write_text("fake zip")
336+
(tmp_path / "data.tar.gz").write_text("fake tar")
337+
338+
mock_client = AsyncMock()
339+
mock_response = Mock()
340+
mock_response.raise_for_status = Mock()
341+
342+
with patch("basic_memory.cli.commands.cloud.upload.get_client") as mock_get_client:
343+
with patch("basic_memory.cli.commands.cloud.upload.call_put") as mock_put:
344+
with patch(
345+
"basic_memory.cli.commands.cloud.upload._get_files_to_upload"
346+
) as mock_get_files:
347+
with patch("aiofiles.open", create=True) as mock_aiofiles_open:
348+
mock_get_client.return_value.__aenter__.return_value = mock_client
349+
mock_get_client.return_value.__aexit__.return_value = None
350+
mock_put.return_value = mock_response
351+
352+
# Mock file listing with all files
353+
mock_get_files.return_value = [
354+
(tmp_path / "notes.md", "notes.md"),
355+
(tmp_path / "backup.zip", "backup.zip"),
356+
(tmp_path / "data.tar.gz", "data.tar.gz"),
357+
]
358+
359+
mock_file = AsyncMock()
360+
mock_file.read.return_value = b"content"
361+
mock_aiofiles_open.return_value.__aenter__.return_value = mock_file
362+
363+
result = await upload_path(tmp_path, "test-project")
364+
365+
# Should succeed
366+
assert result is True
367+
368+
# Should only upload the .md file (not the archives)
369+
assert mock_put.call_count == 1
370+
call_args = mock_put.call_args
371+
assert "notes.md" in call_args[0][1]
372+
373+
# Check output mentions skipping
374+
captured = capsys.readouterr()
375+
assert "Skipping archive file" in captured.out
376+
assert "backup.zip" in captured.out
377+
assert "Skipped 2 archive file(s)" in captured.out
378+
330379
def test_no_gitignore_skips_gitignore_patterns(self, tmp_path):
331380
"""Test that --no-gitignore flag skips .gitignore patterns."""
332381
# Create test files

0 commit comments

Comments
 (0)