Skip to content

Commit e97d1fa

Browse files
committed
"initial commit"
0 parents  commit e97d1fa

41 files changed

Lines changed: 97203 additions & 0 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/cd.yml

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
name: CD
2+
3+
on:
4+
workflow_dispatch:
5+
6+
push:
7+
branches:
8+
- 'main'
9+
10+
jobs:
11+
deploy:
12+
runs-on: ubuntu-latest
13+
strategy:
14+
matrix:
15+
environment: [acc, prd]
16+
environment: ${{ matrix.environment }}
17+
permissions:
18+
contents: write # to push tag
19+
env:
20+
DATABRICKS_HOST: ${{ vars.DATABRICKS_HOST }}
21+
DATABRICKS_CLIENT_ID: ${{ secrets.DATABRICKS_CLIENT_ID }}
22+
DATABRICKS_CLIENT_SECRET: ${{ secrets.DATABRICKS_CLIENT_SECRET }}
23+
steps:
24+
- name: Checkout Source Code
25+
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 #v4.2.2
26+
27+
- name: Install Databricks CLI
28+
uses: databricks/setup-cli@49580195afe1ccb06d195764a1d0ae9fabfe2edd #v0.246.0
29+
with:
30+
version: 0.246.0
31+
32+
- name: Configure Databricks CLI
33+
run: |
34+
mkdir -p ~/.databricks
35+
cat > ~/.databrickscfg << EOF
36+
[marvelous]
37+
host = ${{ vars.DATABRICKS_HOST }}
38+
client_id = ${{ secrets.DATABRICKS_CLIENT_ID }}
39+
client_secret = ${{ secrets.DATABRICKS_CLIENT_SECRET }}
40+
EOF
41+
42+
- name: Install uv
43+
uses: astral-sh/setup-uv@0c5e2b8115b80b4c7c5ddf6ffdd634974642d182 #v5.4.1
44+
45+
- name: Deploy to Databricks
46+
env:
47+
DATABRICKS_BUNDLE_ENV: ${{ matrix.environment }}
48+
run: |
49+
databricks bundle deploy \
50+
--var="git_sha=${{ github.sha }}" \
51+
--var="branch=${{ github.ref_name }}"
52+
if [ "${{ matrix.environment }}" = "prd" ]; then
53+
echo "VERSION=$(cat version.txt)"
54+
git tag $VERSION
55+
git push origin $VERSION
56+
fi

.github/workflows/ci.yml

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
name: CI
2+
3+
on:
4+
pull_request:
5+
branches:
6+
- main
7+
8+
jobs:
9+
pytest_and_checks:
10+
runs-on: ubuntu-latest
11+
steps:
12+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 #v4.2.2
13+
with:
14+
# Fetch all history for all branches and tags
15+
fetch-depth: 0
16+
ref: ${{ github.head_ref }}
17+
18+
- name: Git tag from version.txt
19+
run: |
20+
echo "VERSION=$(cat version.txt)"
21+
git tag $VERSION
22+
23+
- name: Install uv
24+
uses: astral-sh/setup-uv@0c5e2b8115b80b4c7c5ddf6ffdd634974642d182 #v5.4.1
25+
26+
- name: Install the dependencies
27+
run: |
28+
uv sync --extra test
29+
30+
- name: Run pre-commit checks
31+
run: |
32+
uv run pre-commit run --all-files
33+
34+
- name: run pytest
35+
run: |
36+
uv run pytest -m "not ci_exclude"

.gitignore

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
.venv/
2+
# Byte-compiled / optimized / DLL files
3+
__pycache__/
4+
*.py[cod]
5+
*$py.class
6+
.databricks/*
7+
.ruff_cache/*
8+
# C extensions
9+
*.so
10+
11+
# Distribution / packaging
12+
.Python
13+
env/
14+
build/
15+
develop-eggs/
16+
dist/
17+
downloads/
18+
eggs/
19+
.eggs/
20+
lib/
21+
lib64/
22+
parts/
23+
sdist/
24+
var/
25+
*.egg-info/
26+
.installed.cfg
27+
*.egg
28+
MANIFEST
29+
30+
# Installer logs
31+
pip-log.txt
32+
pip-delete-this-directory.txt
33+
34+
# Unit test / coverage reports
35+
htmlcov/
36+
.tox/
37+
.nox/
38+
.coverage
39+
.coverage.*
40+
.cache
41+
nosetests.xml
42+
coverage.xml
43+
*.cover
44+
.hypothesis/
45+
.pytest_cache/
46+
47+
# Jupyter Notebook
48+
.ipynb_checkpoints
49+
50+
# IPython
51+
profile_default/
52+
ipython_config.py
53+
54+
# pyenv
55+
.python-version
56+
57+
# mypy
58+
.mypy_cache/
59+
.dmypy.json
60+
61+
# Pyre type checker
62+
.pyre/
63+
64+
# Data, models, outputs
65+
*.h5
66+
*.hdf5
67+
*.tsv
68+
*.parquet
69+
*.pkl
70+
*.joblib
71+
*.npy
72+
*.npz
73+
*.log
74+
*.mlmodel
75+
*.onnx
76+
*.ckpt
77+
*.pt
78+
*.pth
79+
*.pb
80+
*.tflite
81+
*.metrics
82+
*.tensorboard
83+
runs/
84+
logs/
85+
output/
86+
outputs/
87+
checkpoints/
88+
89+
# MLflow
90+
mlruns/
91+
92+
# VSCode
93+
.vscode/
94+
95+
# MacOS
96+
.DS_Store
97+
98+
# Environment variables
99+
.env
100+
logs*

.pre-commit-config.yaml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
exclude: '^(notebooks|scripts)/.*'
2+
repos:
3+
- repo: https://github.com/pre-commit/pre-commit-hooks
4+
rev: v4.5.0
5+
hooks:
6+
- id: check-json
7+
- id: check-toml
8+
- id: check-yaml
9+
args: ["--unsafe"]
10+
- id: end-of-file-fixer
11+
- id: trailing-whitespace
12+
- id: detect-private-key
13+
- id: forbid-new-submodules
14+
- id: check-docstring-first
15+
- repo: https://github.com/astral-sh/ruff-pre-commit
16+
rev: v0.9.5
17+
hooks:
18+
- id: ruff
19+
args: [--fix, --exit-non-zero-on-fix, --show-fixes]
20+
- id: ruff-format

README.md

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
<h1 align="center">
2+
Marvelous MLOps Free End-to-end MLOps with Databricks Course
3+
4+
## Set up your environment
5+
In this course, we use Databricks serverless [version 3](https://docs.databricks.com/aws/en/release-notes/serverless/environment-version/three)
6+
7+
In our examples, we use UV. Check out the documentation on how to install it: https://docs.astral.sh/uv/getting-started/installation/
8+
9+
To create a new environment and create a lockfile, run:
10+
11+
```
12+
uv sync --extra dev
13+
```
14+
15+
16+
17+
# Data
18+
Using the [**Marvel Characters Dataset**](https://www.kaggle.com/datasets/mohitbansal31s/marvel-characters?resource=download) from Kaggle.
19+
20+
This dataset contains detailed information about Marvel characters (e.g., name, powers, physical attributes, alignment, etc.).
21+
It is used to build classification and feature engineering models for various MLOps tasks, such as predicting character attributes or status.
22+
23+
# Scripts
24+
25+
- `01.process_data.py`: Loads and preprocesses the Marvel dataset, splits into train/test, and saves to the catalog.
26+
- `02.train_register_fe_model.py`: Performs feature engineering and trains the Marvel character model.
27+
- `03.deploy_model.py`: Deploys the trained Marvel model to a Databricks model serving endpoint.
28+
- `04.post_commit_status.py`: Posts status updates for Marvel integration tests to GitHub.
29+
- `05.refresh_monitor.py`: Refreshes monitoring tables and dashboards for Marvel model serving.

0 commit comments

Comments
 (0)