Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
220 changes: 220 additions & 0 deletions .github/workflows/publish_extract_worker.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
name: Publish extract-worker

on:
workflow_dispatch:
push:
tags:
- 'extract-worker-*'

jobs:
create-release:
runs-on: ubuntu-latest
env:
PYTHON_VERSION: 3.12
ASTRAL_VERSION: 0.11.6
steps:
- uses: actions/checkout@v6
- name: Create GH release
run: gh release create "$tag" --generate-notes
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
tag: ${{ github.ref_name }}

publish-extract-worker-to-pypi:
runs-on: ubuntu-latest
permissions:
id-token: write
environment:
name: pypi
url: https://pypi.org/p/datashare-extract-worker
steps:
- uses: actions/checkout@v6
- name: Install uv
uses: astral-sh/setup-uv@v7
with:
version: "0.10.8"
- name: Install Python 3.11
run: uv python install 3.11
- name: Build
run: cd workers/extract-worker && uv build
- name: Publish
run: cd workers/extract-worker && uv publish

publish-io-worker:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- name: Docker meta
id: meta
uses: docker/metadata-action@v6
with:
images: icij/datashare-extract-io-worker
tags: |
type=match,pattern=extract-worker-(.*),group=1

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v4

- name: Login to Docker Hub
uses: docker/login-action@v4
with:
# You'll need to set these secrets
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}

- name: Build and push image
uses: docker/build-push-action@v7
with:
target: io-worker
context: ./workers/extract-worker
platforms: linux/amd64
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}

publish-extract-cpu-worker:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- name: Docker meta
id: meta
uses: docker/metadata-action@v6
with:
images: icij/datashare-extract-cpu-worker
tags: |
type=match,pattern=extract-worker-(.*),group=1

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v4

- name: Login to Docker Hub
uses: docker/login-action@v4
with:
# You'll need to set these secrets
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}

- name: Build and push image
uses: docker/build-push-action@v7
with:
target: extract-cpu-worker
context: ./workers/extract-worker
platforms: linux/amd64
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}

publish-extract-gpu-worker:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- name: Docker meta
id: meta
uses: docker/metadata-action@v6
with:
images: icij/datashare-extract-gpu-worker
tags: |
type=match,pattern=extract-worker-(.*),group=1

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v4

- name: Login to Docker Hub
uses: docker/login-action@v4
with:
# You'll need to set these secrets
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}

- name: Build and push image
uses: docker/build-push-action@v7
with:
target: extract-gpu-worker
context: ./workers/extract-worker
platforms: linux/amd64
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}

publish-extract-cpu-mineru-worker:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- name: Docker meta
id: meta
uses: docker/metadata-action@v6
with:
images: icij/datashare-extract-cpu-mineru-worker
tags: |
type=match,pattern=extract-worker-(.*),group=1

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v4

- name: Login to Docker Hub
uses: docker/login-action@v4
with:
# You'll need to set these secrets
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}

- name: Build and push image
uses: docker/build-push-action@v7
with:
target: extract-cpu-mineru-worker
context: ./workers/extract-worker
platforms: linux/amd64
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}

publish-extract-gpu-mineru-worker:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- name: Docker meta
id: meta
uses: docker/metadata-action@v6
with:
images: icij/datashare-extract-gpu-mineru-worker
tags: |
type=match,pattern=extract-worker-(.*),group=1

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v4

- name: Login to Docker Hub
uses: docker/login-action@v4
with:
# You'll need to set these secrets
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}

- name: Build and push image
uses: docker/build-push-action@v7
with:
target: extract-gpu-mineru-worker
context: ./workers/extract-worker
platforms: linux/amd64
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}

trigger-workflows-worker-publication:
needs:
- publish-extract-worker-to-pypi
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- name: Trigger workflow worker release
run: |
package_version="${package_version/extract-worker-/}"
gh workflow run .github/workflows/publish_workflows_worker.yml -f package="$package" -f package_version="$package_version" --ref main
env:
GH_TOKEN: ${{ github.token }}
package_version: ${{ github.ref_name }}
package: datashare-extract-worker

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: false
61 changes: 61 additions & 0 deletions .github/workflows/test_extract_worker.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
name: Test extract-worker

on:
push:
branches: [ 'main' ]
pull_request:
paths:
- 'datashare-python/**'
- 'workers/extract-worker/**'

# TODO: leverage some caching here
jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- uses: astral-sh/ruff-action@v3
with:
args: "--version" # skips test by displaying the version
- name: Check formatting
run: ruff format --config qa/ruff.toml --check workers/extract-worker
- name: Lint test
run: ruff check --config qa/ruff.toml workers/extract-worker

test:
runs-on: ubuntu-latest
env:
PYTHON_VERSION: 3.12
ASTRAL_VERSION: 0.11.6
steps:
- uses: actions/checkout@v6
- name: Setup Python project
uses: actions/setup-python@v6
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Cache Docker images
uses: ScribeMD/docker-cache@0.5.0
with:
key: docker-${{ runner.os }}-${{ hashFiles('docker-compose.yml') }}
- name: Start test services
run: docker compose up -d datashare temporal-post-init elasticsearch
- name: Install uv
uses: astral-sh/setup-uv@v7
with:
version: ${{ env.ASTRAL_VERSION }}
python-version: ${{ env.PYTHON_VERSION }}
enable-cache: true
working-directory: workers/extract-worker
- name: Install ffmpeg
run: |
sudo apt-get update
sudo apt-get install -y ffmpeg libavcodec-dev libavformat-dev libavutil-dev
- name: Run tests
run: |
cd workers/extract-worker
uv sync --verbose --frozen --extra base --extra cpu --dev
uv run --frozen python -m pytest --timeout=180 -vvv --cache-clear --show-capture=all -r A

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
8 changes: 8 additions & 0 deletions datashare-python/datashare_python/conftest.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import asyncio
import shutil
from asyncio import AbstractEventLoop
from collections.abc import AsyncGenerator, Generator, Iterator, Sequence
from pathlib import Path
Expand Down Expand Up @@ -301,3 +302,10 @@ async def all_done(task_client: DatashareTaskClient, not_done: list[str]) -> boo
@pytest.fixture # noqa: F405
def typer_asyncio_patch() -> None:
nest_asyncio.apply()


def clear_dirs(config: WorkerConfig) -> None:
shutil.rmtree(str(config.artifacts_root))
config.artifacts_root.mkdir(parents=True, exist_ok=True)
shutil.rmtree(str(config.workdir))
config.workdir.mkdir(parents=True, exist_ok=True)
8 changes: 7 additions & 1 deletion datashare-python/datashare_python/objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,11 +248,17 @@ def to_filesystem(self) -> FilesystemDocument:
)


def _is_absolute_path(v: bytes | BytesIO | Path) -> Any:
if isinstance(v, Path) and not v.is_absolute():
raise ValueError("artifact path must be absolute")
return v


@dataclass(frozen=True)
class DocArtifact:
project: str
doc_id: str
artifact: bytes | BytesIO
artifact: Annotated[bytes | BytesIO | Path, AfterValidator(_is_absolute_path)]
filename: str
metadata_key: str

Expand Down
31 changes: 24 additions & 7 deletions datashare-python/datashare_python/template.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ def build_template_tarball() -> None:
is_hidden = path.name.startswith(".") or any(
"." in p for p in path.parts[:-1]
)
if is_hidden or not path.is_file() or path.suffix not in ALLOWED_EXTS:
skip = is_hidden or not path.is_file() or path.suffix not in ALLOWED_EXTS
if path.name != "Dockerfile" and skip:
continue
tar.add(path, arcname=path.relative_to(template_dir))

Expand Down Expand Up @@ -75,8 +76,11 @@ def _update_pyproject_toml(
pyproject_toml["tool"]["uv"].pop("index", None)

project = pyproject_toml["project"]
project["name"] = package_name.replace("_", "-").lower()
project["version"] = "0.1.0"
project["authors"] = []
project.pop("urls", None)
project.pop("description", None)
project["dependencies"] = sorted(
d
for d in project["dependencies"]
Expand All @@ -100,16 +104,29 @@ def _update_pyproject_toml(
"worker_template", package_name
)
entry_points["datashare.activities"]["activities"] = activities_entry_point

deps = entry_points["datashare.dependencies"]["dependencies"]
deps = deps.replace("worker_template", package_name)
entry_points["datashare.dependencies"]["dependencies"] = deps

cfg_cls_entry_point = entry_points["datashare.worker_config_cls"][
"worker_config_cls"
]
cfg_cls_entry_point = cfg_cls_entry_point.replace("worker_template", package_name)
entry_points["datashare.worker_config_cls"]["worker_config_cls"] = (
cfg_cls_entry_point
)

hatch_sdist = pyproject_toml["tool"]["hatch"]["build"]["targets"]["wheel"]
hatch_sdist["packages"] = [
i if i != "worker_template" else package_name for i in hatch_sdist["packages"]
]

hatch_sdist = pyproject_toml["tool"]["hatch"]["build"]["targets"]["sdist"]
if "only-include" in hatch_sdist:
hatch_sdist["only-include"] = [
i if i != "worker_template" else package_name
for i in hatch_sdist["only-include"]
]
hatch_sdist = pyproject_toml["tool"]["hatch"]["build"]["targets"]["wheel"]
if "packages" in hatch_sdist:
hatch_sdist["packages"] = [package_name]

build_system = pyproject_toml["build-system"]
build_system["package"] = package_name

return pyproject_toml
Loading
Loading