From 9f747449fb31c8e92ffea3616357c8cd49d6dcb2 Mon Sep 17 00:00:00 2001
From: Shen-YuFei <shenyufei0415@outlook.com>
Date: Fri, 24 Apr 2026 21:39:11 +0800
Subject: [PATCH 1/4] feat(globus): parallel HTTPS download and checksum TSV
 parsing

---
 Dockerfile                                |   2 +
 pridepy/files/files.py                    | 153 ++++++++++++++--------
 pridepy/tests/test_download_resilience.py |  21 ++-
 3 files changed, 112 insertions(+), 64 deletions(-)
 create mode 100644 Dockerfile
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..cae8514
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,2 @@
+FROM ghcr.io/bigbio/pridepy:0.0.13
+COPY pridepy/ /usr/local/lib/python3.11/site-packages/pridepy/
diff --git a/pridepy/files/files.py b/pridepy/files/files.py
index 18f63bc..2663967 100644
--- a/pridepy/files/files.py
+++ b/pridepy/files/files.py
@@ -10,6 +10,7 @@
 import urllib
 import urllib.request
 import time
+from concurrent.futures import ThreadPoolExecutor, as_completed
 from ftplib import FTP
 from typing import Dict, List, Optional, Tuple
 import socket
@@ -68,57 +69,50 @@ def __init__(self):
         pass
 
     @staticmethod
-    def _parse_checksum_line(line: str) -> Optional[Tuple[str, str]]:
-        """
-        Parse one checksum line and return (file_basename, md5_checksum) when present.
-        Supports common formats:
-          - <md5> <path>
-          - <path>\t<md5>
-          - <md5>\t<path>
-        """
-        clean = line.strip()
-        if not clean or clean.startswith("#"):
-            return None
-
-        tokens = clean.replace("\t", " ").split()
-        if len(tokens) < 2:
-            return None
-
-        checksum = None
-        path_token = None
-        for idx, token in enumerate(tokens):
-            normalized = token.lower()
-            if len(normalized) == 32 and all(c in "0123456789abcdef" for c in normalized):
-                checksum = normalized
-                remaining = [t for i, t in enumerate(tokens) if i != idx]
-                if remaining:
-                    path_token = remaining[-1]
-                break
-
-        if not checksum or not path_token:
-            return None
-
-        file_name = os.path.basename(path_token.lstrip("*"))
-        if not file_name:
-            return None
-        return file_name, checksum
+    def _find_tsv_columns(header: str) -> Optional[Tuple[int, int]]:
+        """Return (name_idx, checksum_idx) from a TSV header, or None."""
+        cols = header.split("\t")
+        name_idx = checksum_idx = None
+        for i, col in enumerate(cols):
+            low = col.lower()
+            if "file" in low and "name" in low:
+                name_idx = i
+            elif "checksum" in low:
+                checksum_idx = i
+        if name_idx is not None and checksum_idx is not None:
+            return name_idx, checksum_idx
+        return None
 
     @staticmethod
     def read_checksum_file(checksum_file_path: str) -> Dict[str, str]:
         """
-        Read checksum TSV/TXT and build {file_name: md5} map.
+        Read PRIDE API checksum TSV and build {file_name: md5} map.
+        Expected format: File-Name\tFile-MD5Checksum\tFile-Size
         """
         checksums: Dict[str, str] = {}
         if not checksum_file_path or not os.path.exists(checksum_file_path):
             return checksums
 
-        with open(checksum_file_path, "r", encoding="utf-8") as checksum_file:
-            for line in checksum_file:
-                parsed = Files._parse_checksum_line(line)
-                if parsed is None:
-                    continue
-                file_name, checksum = parsed
-                checksums[file_name] = checksum
+        with open(checksum_file_path, "r", encoding="utf-8") as f:
+            lines = f.readlines()
+
+        if len(lines) < 2:
+            return checksums
+
+        col_indices = Files._find_tsv_columns(lines[0].strip())
+        if col_indices is None:
+            logging.warning(f"Unrecognized checksum file format: {lines[0].strip()}")
+            return checksums
+
+        name_idx, checksum_idx = col_indices
+        min_cols = max(name_idx, checksum_idx) + 1
+        for line in lines[1:]:
+            parts = line.strip().split("\t")
+            if len(parts) >= min_cols:
+                fn = os.path.basename(parts[name_idx].strip())
+                cs = parts[checksum_idx].strip().lower()
+                if fn and cs:
+                    checksums[fn] = cs
 
         return checksums
 
@@ -485,6 +479,63 @@ def download_files_from_aspera(
             except subprocess.CalledProcessError as e:
                 logging.error(f"Aspera download failed for {new_file_path}: {str(e)}")
 
+    @staticmethod
+    def _download_range(url, file_path, start, end, pbar):
+        """Download a byte range directly into the target file using seek."""
+        session = Util.create_session_with_retries()
+        headers = {"Range": f"bytes={start}-{end}"}
+        with session.get(url, headers=headers, stream=True, timeout=(30, 300)) as r:
+            r.raise_for_status()
+            with open(file_path, "r+b") as f:
+                f.seek(start)
+                for chunk in r.iter_content(chunk_size=8 * 1024 * 1024):
+                    if chunk:
+                        f.write(chunk)
+                        pbar.update(len(chunk))
+
+    @staticmethod
+    def _parallel_download(url, file_path, num_connections=8):
+        """Download a file using parallel Range requests, like browser parallel downloading."""
+        session = Util.create_session_with_retries()
+        head = session.head(url, timeout=(30, 30))
+        total_size = int(head.headers.get("content-length", 0))
+        accept_ranges = head.headers.get("accept-ranges", "none")
+
+        if total_size == 0 or accept_ranges != "bytes":
+            logging.info("Server does not support Range requests, falling back to single connection")
+            with session.get(url, stream=True, timeout=(30, 300)) as r:
+                r.raise_for_status()
+                with tqdm(total=total_size, unit="B", unit_scale=True, desc=file_path) as pbar:
+                    with open(file_path, "wb", buffering=8 * 1024 * 1024) as f:
+                        for chunk in r.iter_content(chunk_size=8 * 1024 * 1024):
+                            if chunk:
+                                f.write(chunk)
+                                pbar.update(len(chunk))
+            return
+
+        chunk_size = total_size // num_connections
+        ranges = []
+        for i in range(num_connections):
+            start = i * chunk_size
+            end = total_size - 1 if i == num_connections - 1 else (i + 1) * chunk_size - 1
+            ranges.append((start, end))
+
+        logging.info(f"Parallel download: {num_connections} connections, {total_size / 1024 / 1024:.0f}MB total")
+
+        with open(file_path, "wb") as f:
+            f.seek(total_size - 1)
+            f.write(b"\0")
+
+        with tqdm(total=total_size, unit="B", unit_scale=True, desc=file_path) as pbar:
+            with ThreadPoolExecutor(max_workers=num_connections) as executor:
+                futures = []
+                for i, (start, end) in enumerate(ranges):
+                    f = executor.submit(Files._download_range, url, file_path, start, end, pbar)
+                    futures.append(f)
+
+                for f in as_completed(futures):
+                    f.result()
+
     @staticmethod
     def download_files_from_globus(
         file_list_json: List[Dict], output_folder, skip_if_downloaded_already
@@ -508,7 +559,7 @@ def download_files_from_globus(
 
                 logging.debug(f"Downloading from Globus: {download_url}")
                 download_url = download_url.replace(
-                    Files.PRIDE_ARCHIVE_FTP_URL_PREFIX, Files.GLOBUS_BASE_URL
+                    "ftp://", "https://"
                 )
 
                 # Create a clean filename to save the downloaded file
@@ -518,21 +569,7 @@ def download_files_from_globus(
                     logging.info("Skipping download as file already exists")
                     continue
 
-                # Get total file size for progress tracking
-                with urllib.request.urlopen(download_url) as response:
-                    total_size = int(response.headers.get("Content-Length", 0))
-
-                # Initialize progress bar
-                progress = Progress(total_size, new_file_path)
-
-                # Download the file with progress bar
-                urllib.request.urlretrieve(
-                    download_url,
-                    new_file_path,
-                    reporthook=lambda blocks, block_size, total_size: progress(block_size),
-                )
-
-                progress.close()
+                Files._parallel_download(download_url, new_file_path)
                 logging.info(f"Successfully downloaded {new_file_path}")
 
             except Exception as e:
diff --git a/pridepy/tests/test_download_resilience.py b/pridepy/tests/test_download_resilience.py
index 46bbcd5..994cfe7 100644
--- a/pridepy/tests/test_download_resilience.py
+++ b/pridepy/tests/test_download_resilience.py
@@ -8,19 +8,28 @@
 
 
 class TestDownloadResilience(TestCase):
-    def test_read_checksum_file_parses_common_formats(self):
+    def test_read_checksum_file_parses_pride_api_format(self):
         with tempfile.TemporaryDirectory() as tmp_dir:
             checksum_path = os.path.join(tmp_dir, "checksums.tsv")
             with open(checksum_path, "w", encoding="utf-8") as handle:
-                handle.write("900150983cd24fb0d6963f7d28e17f72 fileA.raw\n")
-                handle.write("fileB.raw\t900150983cd24fb0d6963f7d28e17f72\n")
-                handle.write("900150983cd24fb0d6963f7d28e17f72\t/path/to/fileC.raw\n")
+                handle.write("File-Name\tFile-MD5Checksum\tFile-Size\n")
+                handle.write("fileA.raw\t900150983cd24fb0d6963f7d28e17f72\t1024\n")
+                handle.write("fileB.raw\t800150983cd24fb0d6963f7d28e17f72\t2048\n")
 
             checksum_map = Files.read_checksum_file(checksum_path)
 
             assert checksum_map["fileA.raw"] == "900150983cd24fb0d6963f7d28e17f72"
-            assert checksum_map["fileB.raw"] == "900150983cd24fb0d6963f7d28e17f72"
-            assert checksum_map["fileC.raw"] == "900150983cd24fb0d6963f7d28e17f72"
+            assert checksum_map["fileB.raw"] == "800150983cd24fb0d6963f7d28e17f72"
+
+    def test_read_checksum_file_returns_empty_on_bad_header(self):
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            checksum_path = os.path.join(tmp_dir, "checksums.tsv")
+            with open(checksum_path, "w", encoding="utf-8") as handle:
+                handle.write("random header\n")
+                handle.write("some data\n")
+
+            checksum_map = Files.read_checksum_file(checksum_path)
+            assert len(checksum_map) == 0
 
     def test_validate_download_rejects_empty_and_bad_checksum(self):
         with tempfile.TemporaryDirectory() as tmp_dir:

From 7958d9a6619400f7e0a14323b7c6d2c088e39880 Mon Sep 17 00:00:00 2001
From: Shen-YuFei <shenyufei0415@outlook.com>
Date: Sat, 25 Apr 2026 14:48:52 +0800
Subject: [PATCH 2/4] git commit -m "fix(globus): update files docker tests and
 version"

---
 Dockerfile                                |  13 +-
 pridepy/files/files.py                    | 141 +++++++++++++---------
 pridepy/tests/test_download_resilience.py |  55 ++++++++-
 pyproject.toml                            |   2 +-
 4 files changed, 147 insertions(+), 64 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index cae8514..fba4ec2 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,2 +1,11 @@
-FROM ghcr.io/bigbio/pridepy:0.0.13
-COPY pridepy/ /usr/local/lib/python3.11/site-packages/pridepy/
+FROM python:3.11-slim-bookworm
+
+WORKDIR /src
+COPY pyproject.toml README.md LICENSE ./
+COPY pridepy ./pridepy
+
+RUN pip install --no-cache-dir --upgrade pip \
+ && pip install --no-cache-dir . \
+ && rm -rf /src
+
+WORKDIR /data
diff --git a/pridepy/files/files.py b/pridepy/files/files.py
index 2663967..a329826 100644
--- a/pridepy/files/files.py
+++ b/pridepy/files/files.py
@@ -59,7 +59,7 @@ class Files:
     API_PRIVATE_URL = "https://www.ebi.ac.uk/pride/private/ws/archive/v2"
     PRIDE_ARCHIVE_FTP = "ftp.pride.ebi.ac.uk"
     PRIDE_ARCHIVE_FTP_URL_PREFIX = "ftp://ftp.pride.ebi.ac.uk/"
-    GLOBUS_BASE_URL = "https://g-a8b222.dd271.03c0.data.globus.org/"
+    PRIDE_ARCHIVE_HTTPS_URL_PREFIX = "https://ftp.pride.ebi.ac.uk/"
     S3_URL = "https://hh.fire.sdo.ebi.ac.uk"
     S3_BUCKET = "pride-public"
     PROTOCOL_ORDER = ["aspera", "s3", "ftp", "globus"]
@@ -71,17 +71,15 @@ def __init__(self):
     @staticmethod
     def _find_tsv_columns(header: str) -> Optional[Tuple[int, int]]:
         """Return (name_idx, checksum_idx) from a TSV header, or None."""
-        cols = header.split("\t")
-        name_idx = checksum_idx = None
-        for i, col in enumerate(cols):
-            low = col.lower()
-            if "file" in low and "name" in low:
-                name_idx = i
-            elif "checksum" in low:
-                checksum_idx = i
-        if name_idx is not None and checksum_idx is not None:
-            return name_idx, checksum_idx
-        return None
+        cols = [col.strip().lower() for col in header.split("\t")]
+        required_cols = {"file-name", "file-md5checksum", "file-size"}
+        if not required_cols.issubset(set(cols)):
+            return None
+        return cols.index("file-name"), cols.index("file-md5checksum")
+
+    @staticmethod
+    def _is_md5_checksum(value: str) -> bool:
+        return len(value) == 32 and all(char in "0123456789abcdef" for char in value)
 
     @staticmethod
     def read_checksum_file(checksum_file_path: str) -> Dict[str, str]:
@@ -94,25 +92,24 @@ def read_checksum_file(checksum_file_path: str) -> Dict[str, str]:
             return checksums
 
         with open(checksum_file_path, "r", encoding="utf-8") as f:
-            lines = f.readlines()
-
-        if len(lines) < 2:
-            return checksums
-
-        col_indices = Files._find_tsv_columns(lines[0].strip())
-        if col_indices is None:
-            logging.warning(f"Unrecognized checksum file format: {lines[0].strip()}")
-            return checksums
-
-        name_idx, checksum_idx = col_indices
-        min_cols = max(name_idx, checksum_idx) + 1
-        for line in lines[1:]:
-            parts = line.strip().split("\t")
-            if len(parts) >= min_cols:
-                fn = os.path.basename(parts[name_idx].strip())
-                cs = parts[checksum_idx].strip().lower()
-                if fn and cs:
-                    checksums[fn] = cs
+            header = f.readline().strip()
+            if not header:
+                return checksums
+
+            col_indices = Files._find_tsv_columns(header)
+            if col_indices is None:
+                logging.warning(f"Unrecognized checksum file format: {header}")
+                return checksums
+
+            name_idx, checksum_idx = col_indices
+            min_cols = max(name_idx, checksum_idx) + 1
+            for line in f:
+                parts = line.strip().split("\t")
+                if len(parts) >= min_cols:
+                    fn = os.path.basename(parts[name_idx].strip())
+                    cs = parts[checksum_idx].strip().lower()
+                    if fn and Files._is_md5_checksum(cs):
+                        checksums[fn] = cs
 
         return checksums
 
@@ -192,7 +189,11 @@ def _get_download_url(file_record: Dict, protocol: str) -> str:
         if protocol == "ftp":
             return ftp_url
         if protocol == "globus":
-            return ftp_url.replace(Files.PRIDE_ARCHIVE_FTP_URL_PREFIX, Files.GLOBUS_BASE_URL)
+            return ftp_url.replace(
+                Files.PRIDE_ARCHIVE_FTP_URL_PREFIX,
+                Files.PRIDE_ARCHIVE_HTTPS_URL_PREFIX,
+                1,
+            )
         if protocol == "s3":
             return ftp_url
         raise ValueError(f"Unsupported protocol: {protocol}")
@@ -486,6 +487,11 @@ def _download_range(url, file_path, start, end, pbar):
         headers = {"Range": f"bytes={start}-{end}"}
         with session.get(url, headers=headers, stream=True, timeout=(30, 300)) as r:
             r.raise_for_status()
+            if r.status_code != 206:
+                raise RuntimeError(f"Server did not honor Range request: {r.status_code}")
+            content_range = r.headers.get("Content-Range", "")
+            if not content_range.lower().startswith(f"bytes {start}-{end}/"):
+                raise RuntimeError(f"Unexpected Content-Range header: {content_range}")
             with open(file_path, "r+b") as f:
                 f.seek(start)
                 for chunk in r.iter_content(chunk_size=8 * 1024 * 1024):
@@ -497,12 +503,20 @@ def _download_range(url, file_path, start, end, pbar):
     def _parallel_download(url, file_path, num_connections=8):
         """Download a file using parallel Range requests, like browser parallel downloading."""
         session = Util.create_session_with_retries()
-        head = session.head(url, timeout=(30, 30))
-        total_size = int(head.headers.get("content-length", 0))
-        accept_ranges = head.headers.get("accept-ranges", "none")
-
-        if total_size == 0 or accept_ranges != "bytes":
-            logging.info("Server does not support Range requests, falling back to single connection")
+        try:
+            head = session.head(url, timeout=(30, 30))
+            head.raise_for_status()
+            total_size = int(head.headers.get("content-length", 0))
+            accept_ranges = head.headers.get("accept-ranges", "none").strip().lower()
+        except (requests.RequestException, ValueError) as exc:
+            logging.info(f"HEAD request failed, falling back to single connection: {exc}")
+            total_size = 0
+            accept_ranges = "none"
+
+        if total_size == 0 or accept_ranges != "bytes" or num_connections < 2:
+            logging.info(
+                "Server does not support Range requests, falling back to single connection"
+            )
             with session.get(url, stream=True, timeout=(30, 300)) as r:
                 r.raise_for_status()
                 with tqdm(total=total_size, unit="B", unit_scale=True, desc=file_path) as pbar:
@@ -513,28 +527,45 @@ def _parallel_download(url, file_path, num_connections=8):
                                 pbar.update(len(chunk))
             return
 
-        chunk_size = total_size // num_connections
+        num_connections = min(num_connections, total_size)
+        chunk_size = (total_size + num_connections - 1) // num_connections
         ranges = []
-        for i in range(num_connections):
-            start = i * chunk_size
-            end = total_size - 1 if i == num_connections - 1 else (i + 1) * chunk_size - 1
+        for start in range(0, total_size, chunk_size):
+            end = min(start + chunk_size - 1, total_size - 1)
             ranges.append((start, end))
 
-        logging.info(f"Parallel download: {num_connections} connections, {total_size / 1024 / 1024:.0f}MB total")
+        logging.info(
+            f"Parallel download: {num_connections} connections, "
+            f"{total_size / 1024 / 1024:.0f}MB total"
+        )
 
         with open(file_path, "wb") as f:
             f.seek(total_size - 1)
             f.write(b"\0")
 
-        with tqdm(total=total_size, unit="B", unit_scale=True, desc=file_path) as pbar:
-            with ThreadPoolExecutor(max_workers=num_connections) as executor:
-                futures = []
-                for i, (start, end) in enumerate(ranges):
-                    f = executor.submit(Files._download_range, url, file_path, start, end, pbar)
-                    futures.append(f)
+        try:
+            with tqdm(total=total_size, unit="B", unit_scale=True, desc=file_path) as pbar:
+                with ThreadPoolExecutor(max_workers=num_connections) as executor:
+                    futures = []
+                    for start, end in ranges:
+                        f = executor.submit(
+                            Files._download_range, url, file_path, start, end, pbar
+                        )
+                        futures.append(f)
 
-                for f in as_completed(futures):
-                    f.result()
+                    for f in as_completed(futures):
+                        f.result()
+        except Exception as exc:
+            logging.info(f"Parallel download failed, retrying with single connection: {exc}")
+            Files._remove_if_exists(file_path)
+            with session.get(url, stream=True, timeout=(30, 300)) as r:
+                r.raise_for_status()
+                with tqdm(total=total_size, unit="B", unit_scale=True, desc=file_path) as pbar:
+                    with open(file_path, "wb", buffering=8 * 1024 * 1024) as f:
+                        for chunk in r.iter_content(chunk_size=8 * 1024 * 1024):
+                            if chunk:
+                                f.write(chunk)
+                                pbar.update(len(chunk))
 
     @staticmethod
     def download_files_from_globus(
@@ -552,15 +583,9 @@ def download_files_from_globus(
 
         for file in file_list_json:
             try:
-                if file["publicFileLocations"][0]["name"] == "FTP Protocol":
-                    download_url = file["publicFileLocations"][0]["value"]
-                else:
-                    download_url = file["publicFileLocations"][1]["value"]
+                download_url = Files._get_download_url(file, "globus")
 
                 logging.debug(f"Downloading from Globus: {download_url}")
-                download_url = download_url.replace(
-                    "ftp://", "https://"
-                )
 
                 # Create a clean filename to save the downloaded file
                 new_file_path = Files.get_output_file_name(download_url, file, output_folder)
diff --git a/pridepy/tests/test_download_resilience.py b/pridepy/tests/test_download_resilience.py
index 994cfe7..5f5bf84 100644
--- a/pridepy/tests/test_download_resilience.py
+++ b/pridepy/tests/test_download_resilience.py
@@ -2,7 +2,7 @@
 import os
 import tempfile
 from unittest import TestCase
-from unittest.mock import patch
+from unittest.mock import Mock, patch
 
 from pridepy.files.files import Files
 
@@ -14,12 +14,14 @@ def test_read_checksum_file_parses_pride_api_format(self):
             with open(checksum_path, "w", encoding="utf-8") as handle:
                 handle.write("File-Name\tFile-MD5Checksum\tFile-Size\n")
                 handle.write("fileA.raw\t900150983cd24fb0d6963f7d28e17f72\t1024\n")
-                handle.write("fileB.raw\t800150983cd24fb0d6963f7d28e17f72\t2048\n")
+                handle.write("fileB.raw\td41d8cd98f00b204e9800998ecf8427e\t2048\n")
+                handle.write("fileC.raw\tnot-a-md5\t4096\n")
 
             checksum_map = Files.read_checksum_file(checksum_path)
 
             assert checksum_map["fileA.raw"] == "900150983cd24fb0d6963f7d28e17f72"
-            assert checksum_map["fileB.raw"] == "800150983cd24fb0d6963f7d28e17f72"
+            assert checksum_map["fileB.raw"] == "d41d8cd98f00b204e9800998ecf8427e"
+            assert "fileC.raw" not in checksum_map
 
     def test_read_checksum_file_returns_empty_on_bad_header(self):
         with tempfile.TemporaryDirectory() as tmp_dir:
@@ -31,6 +33,53 @@ def test_read_checksum_file_returns_empty_on_bad_header(self):
             checksum_map = Files.read_checksum_file(checksum_path)
             assert len(checksum_map) == 0
 
+    def test_get_download_url_maps_globus_to_pride_archive_https(self):
+        file_record = {
+            "publicFileLocations": [
+                {"name": "FTP Protocol", "value": "ftp://ftp.pride.ebi.ac.uk/path/file.raw"}
+            ]
+        }
+
+        download_url = Files._get_download_url(file_record, "globus")
+
+        assert download_url == "https://ftp.pride.ebi.ac.uk/path/file.raw"
+
+    def test_parallel_download_falls_back_when_range_not_honored(self):
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            output_file = os.path.join(tmp_dir, "file.raw")
+            session = Mock()
+            head = Mock()
+            head.headers = {"content-length": "1", "accept-ranges": "bytes"}
+            head.raise_for_status.return_value = None
+            session.head.return_value = head
+
+            ranged_response = Mock()
+            ranged_response.status_code = 200
+            ranged_response.headers = {}
+            ranged_response.raise_for_status.return_value = None
+            ranged_response.__enter__ = Mock(return_value=ranged_response)
+            ranged_response.__exit__ = Mock(return_value=None)
+
+            fallback_response = Mock()
+            fallback_response.raise_for_status.return_value = None
+            fallback_response.iter_content.return_value = [b"abc"]
+            fallback_response.__enter__ = Mock(return_value=fallback_response)
+            fallback_response.__exit__ = Mock(return_value=None)
+            session.get.side_effect = [ranged_response, fallback_response]
+
+            with patch(
+                "pridepy.files.files.Util.create_session_with_retries",
+                return_value=session,
+            ):
+                Files._parallel_download(
+                    "https://example.org/file.raw",
+                    output_file,
+                    num_connections=2,
+                )
+
+            with open(output_file, "rb") as handle:
+                assert handle.read() == b"abc"
+
     def test_validate_download_rejects_empty_and_bad_checksum(self):
         with tempfile.TemporaryDirectory() as tmp_dir:
             file_path = os.path.join(tmp_dir, "test.raw")
diff --git a/pyproject.toml b/pyproject.toml
index 080ecd8..64bd396 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "pridepy"
-version = "0.0.13"
+version = "0.0.14"
 description = "Python Client library for PRIDE Rest API"
 readme = "README.md"
 requires-python = ">=3.9"

From 0bde5cffc08966614feb94fd99fdf9fe006ef9f5 Mon Sep 17 00:00:00 2001
From: Shen-YuFei <shenyufei0415@outlook.com>
Date: Sat, 25 Apr 2026 14:56:26 +0800
Subject: [PATCH 3/4] test(globus): add HEAD failure and accept-ranges fallback
 tests

---
 pridepy/tests/test_download_resilience.py | 55 +++++++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/pridepy/tests/test_download_resilience.py b/pridepy/tests/test_download_resilience.py
index 5f5bf84..a5efae1 100644
--- a/pridepy/tests/test_download_resilience.py
+++ b/pridepy/tests/test_download_resilience.py
@@ -80,6 +80,61 @@ def test_parallel_download_falls_back_when_range_not_honored(self):
             with open(output_file, "rb") as handle:
                 assert handle.read() == b"abc"
 
+    def test_parallel_download_falls_back_when_head_fails(self):
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            output_file = os.path.join(tmp_dir, "file.raw")
+            session = Mock()
+            session.head.side_effect = ValueError("bad content length")
+
+            fallback_response = Mock()
+            fallback_response.raise_for_status.return_value = None
+            fallback_response.iter_content.return_value = [b"abc"]
+            fallback_response.__enter__ = Mock(return_value=fallback_response)
+            fallback_response.__exit__ = Mock(return_value=None)
+            session.get.return_value = fallback_response
+
+            with patch(
+                "pridepy.files.files.Util.create_session_with_retries",
+                return_value=session,
+            ):
+                Files._parallel_download(
+                    "https://example.org/file.raw",
+                    output_file,
+                    num_connections=2,
+                )
+
+            with open(output_file, "rb") as handle:
+                assert handle.read() == b"abc"
+
+    def test_parallel_download_falls_back_without_accept_ranges(self):
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            output_file = os.path.join(tmp_dir, "file.raw")
+            session = Mock()
+            head = Mock()
+            head.headers = {"content-length": "3", "accept-ranges": "none"}
+            head.raise_for_status.return_value = None
+            session.head.return_value = head
+
+            fallback_response = Mock()
+            fallback_response.raise_for_status.return_value = None
+            fallback_response.iter_content.return_value = [b"abc"]
+            fallback_response.__enter__ = Mock(return_value=fallback_response)
+            fallback_response.__exit__ = Mock(return_value=None)
+            session.get.return_value = fallback_response
+
+            with patch(
+                "pridepy.files.files.Util.create_session_with_retries",
+                return_value=session,
+            ):
+                Files._parallel_download(
+                    "https://example.org/file.raw",
+                    output_file,
+                    num_connections=2,
+                )
+
+            with open(output_file, "rb") as handle:
+                assert handle.read() == b"abc"
+
     def test_validate_download_rejects_empty_and_bad_checksum(self):
         with tempfile.TemporaryDirectory() as tmp_dir:
             file_path = os.path.join(tmp_dir, "test.raw")

From 374748fa01aa4aafbb7378eb771136ebcf77a244 Mon Sep 17 00:00:00 2001
From: Shen-YuFei <shenyufei0415@outlook.com>
Date: Sat, 25 Apr 2026 18:15:44 +0800
Subject: [PATCH 4/4] chore(pridepy): remove Dockerfile

---
 Dockerfile | 11 -----------
 README.md  |  8 --------
 2 files changed, 19 deletions(-)
 delete mode 100644 Dockerfile

diff --git a/Dockerfile b/Dockerfile
deleted file mode 100644
index fba4ec2..0000000
--- a/Dockerfile
+++ /dev/null
@@ -1,11 +0,0 @@
-FROM python:3.11-slim-bookworm
-
-WORKDIR /src
-COPY pyproject.toml README.md LICENSE ./
-COPY pridepy ./pridepy
-
-RUN pip install --no-cache-dir --upgrade pip \
- && pip install --no-cache-dir . \
- && rm -rf /src
-
-WORKDIR /data
diff --git a/README.md b/README.md
index 275ab15..f37bb75 100644
--- a/README.md
+++ b/README.md
@@ -222,14 +222,6 @@ uv build
 
 A white paper is available in [paper/paper.md](paper/paper.md).
 
-Build PDF with pandoc:
-
-```bash
-docker run --rm --platform linux/amd64 \
-  -v "$(pwd)/paper:/data" \
-  -w /data openjournals/inara:latest paper.md -p -o pdf
-```
-
 ## Contributing
 
 1. Fork the repository