diff --git a/.github/workflows/ansible-deploy.yml b/.github/workflows/ansible-deploy.yml new file mode 100644 index 0000000000..b9609bd9ff --- /dev/null +++ b/.github/workflows/ansible-deploy.yml @@ -0,0 +1,89 @@ +name: Ansible Deployment + +on: + push: + branches: [master, lab05, lab06] + paths: + - 'ansible/**' + - '.github/workflows/ansible-deploy.yml' + pull_request: + branches: [master, lab05, lab06] + paths: + - 'ansible/**' + - '.github/workflows/ansible-deploy.yml' + +jobs: + lint: + name: Ansible Lint + runs-on: ubuntu-latest + defaults: + run: + working-directory: ansible + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install Ansible and collections + run: | + pip install ansible ansible-lint + ansible-galaxy collection install community.docker community.general + + - name: Run ansible-lint + run: ansible-lint playbooks/*.yml + continue-on-error: true + + deploy: + name: Deploy Application + needs: lint + runs-on: ubuntu-latest + if: github.event_name == 'push' && (github.ref == 'refs/heads/master' || github.ref == 'refs/heads/lab06') + defaults: + run: + working-directory: ansible + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install Ansible and collections + run: | + pip install ansible + ansible-galaxy collection install community.docker community.general + + - name: Setup SSH + run: | + mkdir -p ~/.ssh + echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_rsa + chmod 600 ~/.ssh/id_rsa + ssh-keyscan -H ${{ secrets.VM_HOST }} >> ~/.ssh/known_hosts 2>/dev/null || true + + - name: Create CI inventory + run: | + cat > inventory/hosts.ci.ini << EOF + [webservers] + deploy-target ansible_host=${{ secrets.VM_HOST }} ansible_user=${{ secrets.VM_USER }} ansible_python_interpreter=/usr/bin/python3 + EOF + + - name: Deploy with Ansible + env: + ANSIBLE_VAULT_PASSWORD: ${{ secrets.ANSIBLE_VAULT_PASSWORD }} + run: | + echo "$ANSIBLE_VAULT_PASSWORD" > /tmp/vault_pass + ansible-playbook playbooks/deploy.yml -i inventory/hosts.ci.ini --vault-password-file /tmp/vault_pass + rm -f /tmp/vault_pass + + - name: Verify deployment + run: | + sleep 10 + curl -sf "http://${{ secrets.VM_HOST }}:5000/health" || exit 1 diff --git a/.github/workflows/go-ci.yml b/.github/workflows/go-ci.yml new file mode 100644 index 0000000000..b74f06240b --- /dev/null +++ b/.github/workflows/go-ci.yml @@ -0,0 +1,82 @@ +name: Go CI + +on: + push: + branches: [master, lab02, lab03] + paths: + - 'app_go/**' + - '.github/workflows/go-ci.yml' + pull_request: + branches: [master, lab02, lab03] + paths: + - 'app_go/**' + - '.github/workflows/go-ci.yml' + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +env: + DOCKER_IMAGE: jambulancia/devops-info-service-go + GO_VERSION: '1.24' + +jobs: + test: + name: Lint & Test + runs-on: ubuntu-latest + defaults: + run: + working-directory: app_go + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: ${{ env.GO_VERSION }} + cache-dependency-path: app_go/go.mod + + - name: Run golangci-lint + uses: golangci/golangci-lint-action@v6 + with: + version: latest + working-directory: app_go + + - name: Run tests + run: go test -v ./... + + docker: + name: Build & Push Docker + runs-on: ubuntu-latest + needs: test + if: github.event_name == 'push' && (github.ref == 'refs/heads/master' || github.ref == 'refs/heads/lab02' || github.ref == 'refs/heads/lab03') + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Generate version + id: meta + run: echo "version=$(date +%Y.%m.%d)" >> $GITHUB_OUTPUT + + - name: Build and push + uses: docker/build-push-action@v6 + with: + context: ./app_go + push: true + tags: | + ${{ env.DOCKER_IMAGE }}:${{ steps.meta.outputs.version }} + ${{ env.DOCKER_IMAGE }}:latest + cache-from: type=gha + cache-to: type=gha,mode=max diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml new file mode 100644 index 0000000000..5c8f2df8d2 --- /dev/null +++ b/.github/workflows/python-ci.yml @@ -0,0 +1,95 @@ +name: Python CI + +on: + push: + branches: [master, lab02, lab03] + paths: + - 'app_python/**' + - '.github/workflows/python-ci.yml' + pull_request: + branches: [master, lab02, lab03] + paths: + - 'app_python/**' + - '.github/workflows/python-ci.yml' + +# Cancel outdated runs +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +env: + DOCKER_IMAGE: jambulancia/devops-info-service + PYTHON_VERSION: '3.13' + +jobs: + test: + name: Lint & Test + runs-on: ubuntu-latest + defaults: + run: + working-directory: app_python + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ env.PYTHON_VERSION }} + cache: 'pip' + cache-dependency-path: app_python/requirements*.txt + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt -r requirements-dev.txt + + - name: Run ruff linter + run: ruff check app.py tests/ + + - name: Run tests + run: pytest tests/ -v + + - name: Snyk security scan + uses: snyk/actions/python@master + continue-on-error: true + env: + SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }} + with: + command: test + args: --severity-threshold=high + + docker: + name: Build & Push Docker + runs-on: ubuntu-latest + needs: test + if: github.event_name == 'push' && (github.ref == 'refs/heads/master' || github.ref == 'refs/heads/lab02' || github.ref == 'refs/heads/lab03') + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Generate version + id: meta + run: echo "version=$(date +%Y.%m.%d)" >> $GITHUB_OUTPUT + + - name: Build and push + uses: docker/build-push-action@v6 + with: + context: ./app_python + push: true + tags: | + ${{ env.DOCKER_IMAGE }}:${{ steps.meta.outputs.version }} + ${{ env.DOCKER_IMAGE }}:latest + cache-from: type=gha + cache-to: type=gha,mode=max diff --git a/.github/workflows/terraform-ci.yml b/.github/workflows/terraform-ci.yml new file mode 100644 index 0000000000..965e2e3b0e --- /dev/null +++ b/.github/workflows/terraform-ci.yml @@ -0,0 +1,57 @@ +name: Terraform CI + +on: + push: + branches: [master, lab04] + paths: + - 'terraform/**' + - '.github/workflows/terraform-ci.yml' + pull_request: + branches: [master, lab04] + paths: + - 'terraform/**' + - '.github/workflows/terraform-ci.yml' + +jobs: + validate: + name: Validate Terraform + runs-on: ubuntu-latest + defaults: + run: + working-directory: terraform + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Create dummy SSH public key for validation + run: | + mkdir -p ~/.ssh + echo "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDummyKeyForCIValidationOnly" > ~/.ssh/id_rsa.pub + + - name: Setup Terraform + uses: hashicorp/setup-terraform@v3 + with: + terraform_version: "1.9.0" + terraform_wrapper: false + + - name: Terraform Format Check + run: terraform fmt -check -recursive -diff + + - name: Terraform Init + run: terraform init -backend=false + + - name: Terraform Validate + run: terraform validate + + - name: Setup TFLint + uses: terraform-linters/setup-tflint@v4 + with: + tflint_version: latest + + - name: TFLint Init + run: tflint --init + + - name: TFLint + run: tflint --format compact + continue-on-error: true diff --git a/.gitignore b/.gitignore index 30d74d2584..481c83e2c3 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,29 @@ -test \ No newline at end of file +test + +# Terraform +*.tfstate +*.tfstate.* +.terraform/ +.terraform.lock.hcl +terraform.tfvars +*.tfvars + +# Pulumi +Pulumi.*.yaml +pulumi/venv/ +pulumi/.venv/ + +# Credentials & secrets +.env +*.pem +*.key +credentials + +# Ansible +*.retry +.vault_pass +ansible/inventory/*.pyc +__pycache__/ + +# Lab 12 — local visit counter bind mount (monitoring/docker-compose) +monitoring/data/ diff --git a/ansible/README.md b/ansible/README.md new file mode 100644 index 0000000000..73b175033e --- /dev/null +++ b/ansible/README.md @@ -0,0 +1,34 @@ +# Ansible — Configuration Management + +[![Ansible Deployment](https://github.com/abdughafforzoda/DevOps-Core-Course/actions/workflows/ansible-deploy.yml/badge.svg)](https://github.com/abdughafforzoda/DevOps-Core-Course/actions/workflows/ansible-deploy.yml) + +Role-based Ansible automation for Lab 5–6: system provisioning (common, docker) and application deployment (web_app) via Docker Compose. + +## Quick start + +```bash +# Edit inventory with your VM +vim inventory/hosts.ini + +# Create vault (one-time) +ansible-vault create group_vars/all.yml # use structure from group_vars/all.yml.example + +# Provision +ansible-playbook playbooks/provision.yml + +# Deploy app +ansible-playbook playbooks/deploy.yml --ask-vault-pass +``` + +## Tags (Lab 6) + +```bash +ansible-playbook playbooks/provision.yml --tags "docker" +ansible-playbook playbooks/provision.yml --skip-tags "common" +ansible-playbook playbooks/deploy.yml -e "web_app_wipe=true" --tags web_app_wipe # wipe only +ansible-playbook playbooks/deploy.yml -e "web_app_wipe=true" # clean reinstall +``` + +## CI/CD + +Secrets required: `ANSIBLE_VAULT_PASSWORD`, `SSH_PRIVATE_KEY`, `VM_HOST`, `VM_USER`. See `.github/workflows/ansible-deploy.yml`. diff --git a/ansible/ansible.cfg b/ansible/ansible.cfg new file mode 100644 index 0000000000..8448fb5796 --- /dev/null +++ b/ansible/ansible.cfg @@ -0,0 +1,12 @@ + [defaults] + inventory = inventory/hosts.ini + roles_path = roles + host_key_checking = False + remote_user = ubuntu + retry_files_enabled = False + + [privilege_escalation] + become = True + become_method = sudo + become_user = root + diff --git a/ansible/docs/LAB05.md b/ansible/docs/LAB05.md new file mode 100644 index 0000000000..ee73de9772 --- /dev/null +++ b/ansible/docs/LAB05.md @@ -0,0 +1,69 @@ + # LAB05 — Ansible Fundamentals + + ## 1. Architecture Overview + + - **Ansible version:** _(fill after running `ansible --version`)_ + - **Target VM:** Ubuntu 22.04/24.04 LTS from Lab 4 + - **Structure:** + - `ansible/ansible.cfg` + - `ansible/inventory/hosts.ini` + - `ansible/roles/common`, `docker`, `web_app` (renamed from app_deploy in Lab 6) + - `ansible/playbooks/provision.yml`, `deploy.yml`, `site.yml` + - `ansible/group_vars/all.yml` (vaulted; example in `all.yml.example`) + + Roles are used instead of monolithic playbooks for reusability, clarity, and easier testing. + + ## 2. Roles Documentation + + ### common + - **Purpose:** Base system provisioning (apt cache, common packages, timezone). + - **Variables:** `common_packages`, `common_timezone`. + - **Handlers:** none. + - **Dependencies:** none. + + ### docker + - **Purpose:** Install and configure Docker Engine and dependencies. + - **Variables:** `docker_packages`, `docker_user`. + - **Handlers:** `restart docker`. + - **Dependencies:** expects `docker_user` to exist (e.g. created outside or by another role). + + ### web_app (formerly app_deploy) + - **Purpose:** Deploy app via Docker Compose; log in to Docker Hub, template compose file, run containers, verify health. + - **Variables:** `app_name`, `app_port`, `app_container_name`, `app_restart_policy`, `app_environment`, plus vaulted `dockerhub_username`, `dockerhub_password`, `docker_image`, `docker_image_tag`. + - **Handlers:** `restart app container`. + - **Dependencies:** Docker installed and running (via `docker` role). + + ## 3. Idempotency Demonstration + + Paste and briefly annotate your outputs: + + - **First run of `playbooks/provision.yml`:** _(expect many `changed`)_ + - **Second run of `playbooks/provision.yml`:** _(expect all `ok`, no `changed`)_ + + Explain which tasks changed on the first run and why nothing changed on the second run (desired state already reached). + + ## 4. Ansible Vault Usage + + - Sensitive values (Docker Hub credentials, image name) are stored in `group_vars/all.yml`, which **you create with `ansible-vault`** using the structure from `group_vars/all.yml.example`. + - Use either `--ask-vault-pass` or a `.vault_pass` file (added to `.gitignore`) for automation. + - Vault ensures credentials are encrypted at rest in Git. + + Show: + - Example of encrypted `group_vars/all.yml` (header only; content unreadable). + - How you manage the vault password. + + ## 5. Deployment Verification + + After running: + + ```bash + ansible-playbook playbooks/deploy.yml --ask-vault-pass + ``` + + Capture: + - `docker ps` on the VM showing the container running. + - `curl http://:5000/health` and `/` outputs. + - Any handler executions (e.g., app restart). + + + diff --git a/ansible/docs/LAB06.md b/ansible/docs/LAB06.md new file mode 100644 index 0000000000..a20a5ee943 --- /dev/null +++ b/ansible/docs/LAB06.md @@ -0,0 +1,114 @@ +# Lab 6: Advanced Ansible & CI/CD + +## 1. Overview + +- **Technologies:** Ansible 2.16+, Docker Compose v2, GitHub Actions, Jinja2 +- **Changes from Lab 5:** + - Roles refactored with blocks, rescue, always, and tags + - `app_deploy` renamed to `web_app` + - Deployment switched from `docker run` to Docker Compose + - Wipe logic with variable + tag safety + - Ansible CI/CD workflow (lint + deploy) +- **Structure:** Same `ansible/` layout; `web_app` uses `templates/docker-compose.yml.j2` and `tasks/wipe.yml` + +--- + +## 2. Blocks & Tags + +### common role +- **Block:** Package installation (apt cache, common packages) with tag `packages` +- **Rescue:** Retry `apt update` and package install on failure +- **Always:** Log completion to `/tmp/ansible_common_complete` +- **Tags:** `packages`, `common` + +### docker role +- **Block 1 (docker_install):** Prerequisites, GPG key, repo, Docker packages +- **Rescue:** Wait 10s, retry apt update and Docker install +- **Always:** Ensure Docker service is enabled and started +- **Block 2 (docker_config):** Add user to docker group, install python3-docker +- **Tags:** `docker_install`, `docker_config`, `docker` + +### web_app role +- **Tags:** `app_deploy`, `compose`, `web_app_wipe` (wipe tasks only) + +### Example commands +```bash +ansible-playbook playbooks/provision.yml --tags "docker" +ansible-playbook playbooks/provision.yml --skip-tags "common" +ansible-playbook playbooks/provision.yml --tags "packages" +ansible-playbook playbooks/provision.yml --tags "docker_install" +ansible-playbook playbooks/provision.yml --list-tags +``` + +--- + +## 3. Docker Compose Migration + +- **Template:** `roles/web_app/templates/docker-compose.yml.j2` + - Uses Jinja2 for `app_name`, `docker_image`, `docker_tag`, `app_port`, `app_internal_port`, `app_environment` + - Restart policy: `unless-stopped` +- **Role dependency:** `roles/web_app/meta/main.yml` declares dependency on `docker` +- **Tasks:** Create app dir, template compose file, `docker_login`, `docker_compose_v2` (state: present, pull: always) +- **App dir:** `/opt/{{ app_name }}` (e.g. `/opt/devops-info-service`) + +--- + +## 4. Wipe Logic + +- **Variable:** `web_app_wipe` (default: `false`) +- **Tag:** `web_app_wipe` +- **Location:** `roles/web_app/tasks/wipe.yml`, included at top of `main.yml` +- **Behavior:** Wipe runs only when `web_app_wipe | bool` is true and tasks with tag `web_app_wipe` are executed +- **Tasks:** `docker compose down`, remove compose file, remove app directory, debug log + +### Test scenarios +| Scenario | Command | Result | +|----------|---------|--------| +| Normal deploy | `ansible-playbook deploy.yml` | Deploy only; wipe skipped | +| Wipe only | `ansible-playbook deploy.yml -e "web_app_wipe=true" --tags web_app_wipe` | Wipe only; deploy skipped | +| Clean reinstall | `ansible-playbook deploy.yml -e "web_app_wipe=true"` | Wipe then deploy | +| Tag only, var false | `ansible-playbook deploy.yml --tags web_app_wipe` | Wipe skipped (when blocks it) | + +--- + +## 5. CI/CD Integration + +- **Workflow:** `.github/workflows/ansible-deploy.yml` +- **Triggers:** Push/PR to `ansible/**` on master, lab05, lab06 +- **Jobs:** + 1. **lint:** ansible-lint on playbooks (continue-on-error: true) + 2. **deploy:** Runs only on push to master/lab06; requires secrets +- **Secrets:** `ANSIBLE_VAULT_PASSWORD`, `SSH_PRIVATE_KEY`, `VM_HOST`, `VM_USER` +- **Deploy steps:** Install Ansible, setup SSH, create CI inventory, run `deploy.yml` with vault, verify `/health` +- **Verification:** `curl http://VM_HOST:5000/health` after deploy + +### Badge +```markdown +[![Ansible Deployment](https://github.com/abdughafforzoda/DevOps-Core-Course/actions/workflows/ansible-deploy.yml/badge.svg)](https://github.com/abdughafforzoda/DevOps-Core-Course/actions/workflows/ansible-deploy.yml) +``` + +--- + +## 6. Testing + +- **Idempotency:** Run `deploy.yml` twice; second run should show mostly `ok`, no changes. +- **Selective tags:** Use `--tags` and `--skip-tags` as in section 2. +- **Wipe tests:** Run all four scenarios in section 4 and verify. +- **CI:** Push changes to `ansible/`, confirm workflow runs and lint passes; deploy passes when secrets are set. + +--- + +## 7. Challenges + +- _(Add any issues and how you resolved them)_ +- **Note:** `community.docker.docker_compose_v2` has no `state: restarted`; handler uses `docker compose restart` via `command` module. + +--- + +## 8. Research Answers + +1. **Variable + tag:** Variable ensures wipe is explicit; tag limits wipe to runs where wipe is intended. Prevents accidental wipe. +2. **`never` vs this approach:** `never` runs only when explicitly requested; our approach also requires the variable. +3. **Wipe before deploy:** Enables wipe → deploy in one run (clean reinstall). +4. **Clean reinstall vs rolling update:** Clean reinstall for major changes or corruption; rolling update for low-risk updates. +5. **Extending wipe:** Add tasks to remove images (`docker image prune`) and volumes (`docker volume rm`) after `compose down`. diff --git a/ansible/group_vars/all.yml.example b/ansible/group_vars/all.yml.example new file mode 100644 index 0000000000..04efebb10a --- /dev/null +++ b/ansible/group_vars/all.yml.example @@ -0,0 +1,15 @@ +--- +# Docker Hub credentials (use ansible-vault for real file) +dockerhub_username: your-username +dockerhub_password: your-access-token + +# Application configuration +app_name: devops-info-service +docker_image: "{{ dockerhub_username }}/{{ app_name }}" +docker_image_tag: latest +docker_tag: "{{ docker_image_tag }}" +app_port: 5000 +app_internal_port: 5000 +app_container_name: "{{ app_name }}" +compose_project_dir: "/opt/{{ app_name }}" + diff --git a/ansible/inventory/hosts.ini b/ansible/inventory/hosts.ini new file mode 100644 index 0000000000..75fa0f5aef --- /dev/null +++ b/ansible/inventory/hosts.ini @@ -0,0 +1,3 @@ + [webservers] + vm-name ansible_host=
ansible_user=ubuntu ansible_python_interpreter=/usr/bin/python3 + diff --git a/ansible/playbooks/deploy.yml b/ansible/playbooks/deploy.yml new file mode 100644 index 0000000000..d91d1dab28 --- /dev/null +++ b/ansible/playbooks/deploy.yml @@ -0,0 +1,9 @@ +--- +- name: Deploy application + hosts: webservers + become: true + + roles: + - role: web_app + tags: [web_app, app_deploy, compose] + diff --git a/ansible/playbooks/provision.yml b/ansible/playbooks/provision.yml new file mode 100644 index 0000000000..a2e6f74cd5 --- /dev/null +++ b/ansible/playbooks/provision.yml @@ -0,0 +1,11 @@ +--- +- name: Provision web servers + hosts: webservers + become: true + + roles: + - role: common + tags: [common, packages] + - role: docker + tags: [docker, docker_install, docker_config] + diff --git a/ansible/playbooks/site.yml b/ansible/playbooks/site.yml new file mode 100644 index 0000000000..5a3c203ebd --- /dev/null +++ b/ansible/playbooks/site.yml @@ -0,0 +1,10 @@ + --- + - name: Full provisioning and deployment + hosts: webservers + become: true + + roles: + - common + - docker + - web_app + diff --git a/ansible/roles/common/defaults/main.yml b/ansible/roles/common/defaults/main.yml new file mode 100644 index 0000000000..2027a15cc5 --- /dev/null +++ b/ansible/roles/common/defaults/main.yml @@ -0,0 +1,15 @@ + --- + # Default packages for common system setup + common_packages: + - python3-pip + - curl + - git + - vim + - htop + - ca-certificates + - apt-transport-https + + common_timezone: Etc/UTC + # User to ensure exists for deployments (optional) + common_deploy_user: null + diff --git a/ansible/roles/common/tasks/main.yml b/ansible/roles/common/tasks/main.yml new file mode 100644 index 0000000000..0d1e801fa0 --- /dev/null +++ b/ansible/roles/common/tasks/main.yml @@ -0,0 +1,60 @@ +--- +- name: Install common packages + block: + - name: Update apt cache + ansible.builtin.apt: + update_cache: true + cache_valid_time: 3600 + + - name: Install common packages + ansible.builtin.apt: + name: "{{ common_packages }}" + state: present + install_recommends: false + + rescue: + - name: Retry apt update on failure + ansible.builtin.apt: + update_cache: true + cache_valid_time: 0 + + - name: Retry package installation + ansible.builtin.apt: + name: "{{ common_packages }}" + state: present + install_recommends: false + + always: + - name: Log common role completion + ansible.builtin.copy: + content: "common role completed at {{ ansible_date_time.iso8601 }}\n" + dest: /tmp/ansible_common_complete + mode: "0644" + + tags: + - packages + - common + +- name: Ensure deploy user exists + block: + - name: Create deploy user + ansible.builtin.user: + name: "{{ common_deploy_user }}" + state: present + shell: /bin/bash + create_home: true + + when: common_deploy_user is defined and common_deploy_user | length > 0 + tags: + - users + - common + +- name: Set system timezone + block: + - name: Configure timezone + community.general.timezone: + name: "{{ common_timezone }}" + + when: common_timezone is defined + tags: + - common diff --git a/ansible/roles/docker/defaults/main.yml b/ansible/roles/docker/defaults/main.yml new file mode 100644 index 0000000000..c7f41e60fc --- /dev/null +++ b/ansible/roles/docker/defaults/main.yml @@ -0,0 +1,8 @@ + --- + docker_packages: + - docker-ce + - docker-ce-cli + - containerd.io + + docker_user: ubuntu + diff --git a/ansible/roles/docker/handlers/main.yml b/ansible/roles/docker/handlers/main.yml new file mode 100644 index 0000000000..0fbc85062b --- /dev/null +++ b/ansible/roles/docker/handlers/main.yml @@ -0,0 +1,6 @@ + --- + - name: restart docker + ansible.builtin.service: + name: docker + state: restarted + diff --git a/ansible/roles/docker/tasks/main.yml b/ansible/roles/docker/tasks/main.yml new file mode 100644 index 0000000000..bf5f2148a6 --- /dev/null +++ b/ansible/roles/docker/tasks/main.yml @@ -0,0 +1,86 @@ +--- +- name: Install Docker + block: + - name: Install prerequisite packages + ansible.builtin.apt: + name: + - ca-certificates + - curl + - gnupg + state: present + update_cache: true + + - name: Add Docker GPG key + ansible.builtin.apt_key: + url: https://download.docker.com/linux/ubuntu/gpg + state: present + + - name: Add Docker APT repository + ansible.builtin.apt_repository: + repo: "deb [arch=amd64] https://download.docker.com/linux/ubuntu {{ ansible_distribution_release }} stable" + state: present + notify: restart docker + + - name: Install Docker packages + ansible.builtin.apt: + name: "{{ docker_packages }}" + state: present + update_cache: true + notify: restart docker + + rescue: + - name: Wait before retry + ansible.builtin.pause: + seconds: 10 + prompt: "Retrying after GPG/repo failure..." + + - name: Retry apt update + ansible.builtin.apt: + update_cache: true + cache_valid_time: 0 + + - name: Retry Docker GPG key + ansible.builtin.apt_key: + url: https://download.docker.com/linux/ubuntu/gpg + state: present + + - name: Retry Docker repository + ansible.builtin.apt_repository: + repo: "deb [arch=amd64] https://download.docker.com/linux/ubuntu {{ ansible_distribution_release }} stable" + state: present + notify: restart docker + + - name: Retry Docker package install + ansible.builtin.apt: + name: "{{ docker_packages }}" + state: present + update_cache: true + notify: restart docker + + always: + - name: Ensure Docker service is enabled and started + ansible.builtin.service: + name: docker + state: started + enabled: true + + tags: + - docker_install + - docker + +- name: Configure Docker + block: + - name: Add user to docker group + ansible.builtin.user: + name: "{{ docker_user }}" + groups: docker + append: true + + - name: Install python3-docker for Ansible Docker modules + ansible.builtin.apt: + name: python3-docker + state: present + + tags: + - docker_config + - docker diff --git a/ansible/roles/web_app/defaults/main.yml b/ansible/roles/web_app/defaults/main.yml new file mode 100644 index 0000000000..b390d49f0f --- /dev/null +++ b/ansible/roles/web_app/defaults/main.yml @@ -0,0 +1,18 @@ +--- +app_name: devops-info-service +docker_image: "{{ dockerhub_username }}/{{ app_name }}" +docker_tag: "{{ docker_image_tag | default('latest') }}" +app_port: 5000 +app_internal_port: 5000 +app_container_name: "{{ app_name }}" +app_restart_policy: unless-stopped +app_environment: {} + +# Compose config +compose_project_dir: "/opt/{{ app_name }}" +docker_compose_version: "3.8" + +# Wipe logic: set to true to remove application completely +# Wipe only: ansible-playbook deploy.yml -e "web_app_wipe=true" --tags web_app_wipe +# Clean install: ansible-playbook deploy.yml -e "web_app_wipe=true" +web_app_wipe: false diff --git a/ansible/roles/web_app/handlers/main.yml b/ansible/roles/web_app/handlers/main.yml new file mode 100644 index 0000000000..c325f998c9 --- /dev/null +++ b/ansible/roles/web_app/handlers/main.yml @@ -0,0 +1,5 @@ +--- +- name: restart app container + ansible.builtin.command: + cmd: docker compose restart + chdir: "{{ compose_project_dir }}" diff --git a/ansible/roles/web_app/meta/main.yml b/ansible/roles/web_app/meta/main.yml new file mode 100644 index 0000000000..4990033a1e --- /dev/null +++ b/ansible/roles/web_app/meta/main.yml @@ -0,0 +1,4 @@ +--- +# Ensure Docker is installed before deploying the web app +dependencies: + - role: docker diff --git a/ansible/roles/web_app/tasks/main.yml b/ansible/roles/web_app/tasks/main.yml new file mode 100644 index 0000000000..de0d9400fe --- /dev/null +++ b/ansible/roles/web_app/tasks/main.yml @@ -0,0 +1,51 @@ +--- +# Wipe logic runs first when explicitly requested (-e web_app_wipe=true) +- name: Include wipe tasks + ansible.builtin.include_tasks: wipe.yml + tags: + - web_app_wipe + +# Deployment block: skip when wipe-only (tag web_app_wipe alone) +- name: Deploy application with Docker Compose + block: + - name: Create app directory + ansible.builtin.file: + path: "{{ compose_project_dir }}" + state: directory + mode: "0755" + + - name: Template docker-compose file + ansible.builtin.template: + src: docker-compose.yml.j2 + dest: "{{ compose_project_dir }}/docker-compose.yml" + mode: "0644" + notify: restart app container + + - name: Log in to Docker Hub + community.docker.docker_login: + username: "{{ dockerhub_username }}" + password: "{{ dockerhub_password }}" + no_log: true + + - name: Deploy with Docker Compose + community.docker.docker_compose_v2: + project_src: "{{ compose_project_dir }}" + state: present + pull: always + + - name: Wait for application port on target + ansible.builtin.wait_for: + host: 127.0.0.1 + port: "{{ app_port }}" + delay: 5 + timeout: 60 + + - name: Check health endpoint + ansible.builtin.uri: + url: "http://127.0.0.1:{{ app_port }}/health" + method: GET + status_code: 200 + + tags: + - app_deploy + - compose diff --git a/ansible/roles/web_app/tasks/wipe.yml b/ansible/roles/web_app/tasks/wipe.yml new file mode 100644 index 0000000000..c3341d2372 --- /dev/null +++ b/ansible/roles/web_app/tasks/wipe.yml @@ -0,0 +1,29 @@ +--- +# Wipe logic: controlled by web_app_wipe variable + web_app_wipe tag +# Only runs when both -e "web_app_wipe=true" AND --tags web_app_wipe (or full deploy with wipe) +- name: Wipe web application + block: + - name: Stop and remove containers + community.docker.docker_compose_v2: + project_src: "{{ compose_project_dir }}" + state: absent + ignore_errors: true + + - name: Remove docker-compose file + ansible.builtin.file: + path: "{{ compose_project_dir }}/docker-compose.yml" + state: absent + ignore_errors: true + + - name: Remove application directory + ansible.builtin.file: + path: "{{ compose_project_dir }}" + state: absent + + - name: Log wipe completion + ansible.builtin.debug: + msg: "Application {{ app_name }} wiped successfully" + + when: web_app_wipe | default(false) | bool + tags: + - web_app_wipe diff --git a/ansible/roles/web_app/templates/docker-compose.yml.j2 b/ansible/roles/web_app/templates/docker-compose.yml.j2 new file mode 100644 index 0000000000..852da84be4 --- /dev/null +++ b/ansible/roles/web_app/templates/docker-compose.yml.j2 @@ -0,0 +1,15 @@ +version: '{{ docker_compose_version }}' + +services: + {{ app_name }}: + image: {{ docker_image }}:{{ docker_tag }} + container_name: {{ app_container_name }} + ports: + - "{{ app_port }}:{{ app_internal_port }}" + environment: + PORT: "{{ app_internal_port }}" + HOST: "0.0.0.0" + {% for key, value in app_environment.items() %} + {{ key }}: "{{ value }}" + {% endfor %} + restart: {{ app_restart_policy }} diff --git a/app_go/.dockerignore b/app_go/.dockerignore new file mode 100644 index 0000000000..6ff436b9f9 --- /dev/null +++ b/app_go/.dockerignore @@ -0,0 +1,21 @@ +# Version control +.git/ +.gitignore + +# IDE / editor +.vscode/ +.idea/ +*.swp +*.swo + +# OS +.DS_Store +Thumbs.db + +docs/ +*.md +README* + +*.exe +*.test +*.out diff --git a/app_go/.gitignore b/app_go/.gitignore new file mode 100644 index 0000000000..94b0741a64 --- /dev/null +++ b/app_go/.gitignore @@ -0,0 +1,7 @@ +# Binaries +devops-info-service +devops-info-service-small +*.exe + +# OS +.DS_Store diff --git a/app_go/Dockerfile b/app_go/Dockerfile new file mode 100644 index 0000000000..d5719890e8 --- /dev/null +++ b/app_go/Dockerfile @@ -0,0 +1,32 @@ +# DevOps Info Service — Go (Lab 2 Bonus) +# Multi-stage build: separate build environment from runtime + +FROM golang:1.24-alpine AS builder + +WORKDIR /build + +COPY go.mod . +RUN go mod download + +COPY main.go . + +RUN CGO_ENABLED=0 GOOS=linux go build -ldflags="-s -w" -o devops-info-service . + +FROM alpine:3.21 + +RUN addgroup -g 1000 appgroup && \ + adduser -D -u 1000 -G appgroup appuser + +WORKDIR /app + +COPY --from=builder /build/devops-info-service . + +RUN chown appuser:appgroup devops-info-service + +USER appuser + +EXPOSE 5000 + +ENV PORT=5000 HOST=0.0.0.0 + +CMD ["./devops-info-service"] diff --git a/app_go/README.md b/app_go/README.md new file mode 100644 index 0000000000..646594edca --- /dev/null +++ b/app_go/README.md @@ -0,0 +1,82 @@ +# DevOps Info Service (Go) + +[![Go CI](https://github.com/abdughafforzoda/DevOps-Core-Course/actions/workflows/go-ci.yml/badge.svg)](https://github.com/abdughafforzoda/DevOps-Core-Course/actions/workflows/go-ci.yml) + +Go implementation of the DevOps Info Service — same endpoints and JSON structure as the Python version. Used for Lab 1 bonus and as a basis for multi-stage Docker builds in Lab 2. + +## Prerequisites + +- **Go 1.21+** (1.24 used during development) + +## Build + +```bash +cd app_go + +# Standard build +go build -o devops-info-service . + +# Smaller binary (strip debug info) — recommended for Docker +go build -ldflags="-s -w" -o devops-info-service . +``` + +## Run + +```bash +./devops-info-service +``` + +Defaults: `HOST=0.0.0.0`, `PORT=5000`. + +Custom config: + +```bash +PORT=8080 ./devops-info-service +HOST=127.0.0.1 PORT=3000 ./devops-info-service +``` + +## API Endpoints + +- **`GET /`** — Service, system, runtime, and request info + endpoints list. +- **`GET /health`** — Health check (`status`, `timestamp`, `uptime_seconds`). + +## Configuration + +| Variable | Default | Description | +|----------|-----------|--------------------| +| `HOST` | `0.0.0.0` | Bind address | +| `PORT` | `5000` | Listen port | + +## Binary size vs Python + +| Build | Size | +|-------|--------| +| `go build` (default) | ~8.1 MB | +| `go build -ldflags="-s -w"` | ~5.5 MB | + +Python runs via interpreter + virtualenv; there is no single executable. The Go binary is self-contained and suitable for minimal Docker images (e.g. `scratch` or `alpine`). + +## Test + +```bash +cd app_go +go test -v ./... +``` + +## Docker + +Multi-stage build (Lab 2 bonus): + +```bash +docker build -t devops-info-service-go . +docker run -p 5000:5000 devops-info-service-go +``` + +See `docs/LAB02.md` for the multi-stage strategy and size analysis. + +## Test + +```bash +curl -s http://localhost:5000/ | jq +curl -s http://localhost:5000/health | jq +``` diff --git a/app_go/docs/GO.md b/app_go/docs/GO.md new file mode 100644 index 0000000000..11e7287a1e --- /dev/null +++ b/app_go/docs/GO.md @@ -0,0 +1,21 @@ +# Go — Language Justification + +## Why Go for this service + +- **Small, static binaries** — Single executable, no runtime or interpreter. Ideal for Docker multi-stage builds (Lab 2) and minimal images (`scratch` / `alpine`). +- **Fast compilation** — `go build` completes in seconds. Good for CI/CD. +- **Standard library** — `net/http`, `encoding/json`, `os`, `runtime` cover everything we need. No external dependencies. +- **Simple concurrency** — Goroutines and channels are available if we add more workloads later; for this lab, a single handler is enough. +- **Tooling** — `go build`, `go test`, `go mod` are built-in and straightforward. + +## Compared to alternatives + +| Criterion | Go | Rust | Java/Spring Boot | C# / ASP.NET Core | +|------------------|----------|-------------|------------------|-------------------| +| Binary size | ~5–8 MB | Similar | Large (JVM) | Large (.NET) | +| Build speed | Very fast| Slower | Moderate | Moderate | +| Learning curve | Low | Steep | Moderate | Moderate | +| Stdlib HTTP | Yes | Via crates | Via framework | Via framework | +| Lab 2 Docker fit | Excellent| Good | Heavier | Heavier | + +Go is a good fit for a small HTTP service that will be containerized and used in CI/CD and Kubernetes later in the course. diff --git a/app_go/docs/LAB01.md b/app_go/docs/LAB01.md new file mode 100644 index 0000000000..1698e252a8 --- /dev/null +++ b/app_go/docs/LAB01.md @@ -0,0 +1,52 @@ +# Lab 1 Bonus — Go Implementation + +## Overview + +Same DevOps Info Service as the Python app: `GET /` (service + system + runtime + request + endpoints) and `GET /health` (health check). Implemented in Go using only the standard library. + +## Implementation details + +- **`main.go`** — Single binary. Handlers: `mainHandler` for `/`, `healthHandler` for `/health`. `/health` is registered first so it is matched before `/`. +- **System info** — `os.Hostname()`, `runtime.GOOS` / `GOARCH` / `NumCPU()` / `Version()`. `platform_version` comes from `/etc/os-release` (`PRETTY_NAME`) on Linux; otherwise `runtime.GOOS`. +- **Uptime** — `startTime` stored at startup; duration computed on each request. Same human-readable format as Python (`"X hours, Y minutes"`). +- **Request info** — Client IP from `RemoteAddr` (or `X-Forwarded-For`), `User-Agent`, method, path. +- **Config** — `HOST` and `PORT` via env; defaults `0.0.0.0` and `5000`. + +## JSON structure + +Matches the Python shape. Differences: + +- **`system.go_version`** — Go version (e.g. `1.24.2`) instead of `python_version`. +- **`service.framework`** — `"net/http"` (stdlib) instead of `"Flask"`. + +## Build and run + +```bash +cd app_go +go build -ldflags="-s -w" -o devops-info-service . +./devops-info-service +``` + +```bash +curl -s http://localhost:5000/ | jq +curl -s http://localhost:5000/health | jq +``` + +## Binary size vs Python + +| Build | Size (approx.) | +|-------|----------------| +| `go build` | ~8.1 MB | +| `go build -ldflags="-s -w"` | ~5.5 MB | + +Python runs via interpreter + venv; there is no single executable. The Go binary is self-contained and suitable for minimal container images. + +## Screenshots + +Place in `app_go/docs/screenshots/`: + +- Main endpoint (`GET /`) — full JSON. +- Health check (`GET /health`) — JSON response. +- Formatted output (e.g. `curl … | jq`) or browser. + +Build and run the binary, then capture these to complete the bonus submission. diff --git a/app_go/docs/LAB02.md b/app_go/docs/LAB02.md new file mode 100644 index 0000000000..8ad23ea145 --- /dev/null +++ b/app_go/docs/LAB02.md @@ -0,0 +1,109 @@ +## LAB02 — Docker Containerization (Go, Bonus) + +### Multi-Stage Build Strategy + +The Go app is containerized using a **multi-stage Dockerfile** with two stages: + +1. **Builder stage** (`golang:1.24-alpine`): Compiles the application. +2. **Runtime stage** (`alpine:3.21`): Runs only the compiled binary. + +**Why multi-stage?** The builder image includes the Go compiler, SDK, and build tools (~300 MB). The runtime image needs none of that—just the static binary and a minimal OS. Copying only the binary into Alpine yields an image under 20 MB. + +--- + +### Stage Breakdown + +#### Stage 1: Builder + +```dockerfile +FROM golang:1.24-alpine AS builder +WORKDIR /build +COPY go.mod . +RUN go mod download +COPY main.go . +RUN CGO_ENABLED=0 GOOS=linux go build -ldflags="-s -w" -o devops-info-service . +``` + +| Step | Purpose | +|------|---------| +| `go.mod` first | Dependencies are cached when only `main.go` changes | +| `CGO_ENABLED=0` | Produces a static binary with no C dependencies; works on any Linux base | +| `-ldflags="-s -w"` | Strips debug info to reduce binary size | +| `-o devops-info-service` | Single output binary to copy into runtime | + +#### Stage 2: Runtime + +```dockerfile +FROM alpine:3.21 +RUN addgroup -g 1000 appgroup && adduser -D -u 1000 -G appgroup appuser +WORKDIR /app +COPY --from=builder /build/devops-info-service . +RUN chown appuser:appgroup devops-info-service +USER appuser +EXPOSE 5000 +ENV PORT=5000 HOST=0.0.0.0 +CMD ["./devops-info-service"] +``` + +| Step | Purpose | +|------|---------| +| `COPY --from=builder` | Copy only the binary from the builder stage | +| Non-root user | Run as `appuser` for security | +| Alpine 3.21 | Small base (~5 MB); static binary needs no C runtime | + +--- + +### Size Comparison + +*Replace with your actual output from `docker images`.* + +| Image | Size | +|-------|------| +| `golang:1.24-alpine` (builder) | ~300 MB | +| `devops-info-service` (final) | ~15–20 MB | + +**Size reduction:** ~95% smaller than using the builder image as the final image. + +--- + +### Why Multi-Stage Matters for Compiled Languages + +- **Builder image:** Includes compiler, linker, headers, and libraries. Necessary only for building. +- **Runtime:** Only needs the compiled binary and minimal runtime (Alpine). +- **Security:** Fewer packages and tools reduce attack surface. +- **Deploy speed:** Smaller images pull and start faster. + +--- + +### Build & Run + +**Build:** + +```bash +cd app_go +docker build -t devops-info-service-go . +``` + +*Add your actual build output here.* + +**Run:** + +```bash +docker run -d -p 5000:5000 --name devops-go devops-info-service-go +``` + +**Test:** + +```bash +curl http://localhost:5000/ +curl http://localhost:5000/health +``` + +--- + +### Security Benefits + +- **Non-root user:** Limits damage if the app is compromised. +- **Minimal base:** Alpine has fewer packages than full distros. +- **Static binary:** No runtime dependency installation; fewer paths for supply-chain issues. +- **Smaller image:** Less code to audit and fewer CVE-prone components. diff --git a/app_go/docs/screenshots/Screenshot From 2026-01-28 22-23-49.png b/app_go/docs/screenshots/Screenshot From 2026-01-28 22-23-49.png new file mode 100644 index 0000000000..f696d3cf53 Binary files /dev/null and b/app_go/docs/screenshots/Screenshot From 2026-01-28 22-23-49.png differ diff --git a/app_go/docs/screenshots/Screenshot From 2026-01-28 22-23-59.png b/app_go/docs/screenshots/Screenshot From 2026-01-28 22-23-59.png new file mode 100644 index 0000000000..3591e381c0 Binary files /dev/null and b/app_go/docs/screenshots/Screenshot From 2026-01-28 22-23-59.png differ diff --git a/app_go/docs/screenshots/Screenshot From 2026-01-28 22-24-55.png b/app_go/docs/screenshots/Screenshot From 2026-01-28 22-24-55.png new file mode 100644 index 0000000000..545469177d Binary files /dev/null and b/app_go/docs/screenshots/Screenshot From 2026-01-28 22-24-55.png differ diff --git a/app_go/docs/screenshots/Screenshot From 2026-02-04 16-44-48.png b/app_go/docs/screenshots/Screenshot From 2026-02-04 16-44-48.png new file mode 100644 index 0000000000..0e50777303 Binary files /dev/null and b/app_go/docs/screenshots/Screenshot From 2026-02-04 16-44-48.png differ diff --git a/app_go/go.mod b/app_go/go.mod new file mode 100644 index 0000000000..85ee40ae7d --- /dev/null +++ b/app_go/go.mod @@ -0,0 +1,3 @@ +module devops-info-service + +go 1.24.2 diff --git a/app_go/main.go b/app_go/main.go new file mode 100644 index 0000000000..2fa886a0b5 --- /dev/null +++ b/app_go/main.go @@ -0,0 +1,197 @@ +// DevOps Info Service — Go implementation. +// Same endpoints and JSON structure as the Python version. + +package main + +import ( + "bufio" + "encoding/json" + "fmt" + "log" + "net" + "net/http" + "os" + "runtime" + "strings" + "time" +) + +type ServiceInfo struct { + Service Service `json:"service"` + System System `json:"system"` + Runtime Runtime `json:"runtime"` + Request Request `json:"request"` + Endpoints []Endpoint `json:"endpoints"` +} + +type Service struct { + Name string `json:"name"` + Version string `json:"version"` + Description string `json:"description"` + Framework string `json:"framework"` +} + +type System struct { + Hostname string `json:"hostname"` + Platform string `json:"platform"` + PlatformVersion string `json:"platform_version"` + Architecture string `json:"architecture"` + CPUCount int `json:"cpu_count"` + GoVersion string `json:"go_version"` +} + +type Runtime struct { + UptimeSeconds int `json:"uptime_seconds"` + UptimeHuman string `json:"uptime_human"` + CurrentTime string `json:"current_time"` + Timezone string `json:"timezone"` +} + +type Request struct { + ClientIP string `json:"client_ip"` + UserAgent string `json:"user_agent"` + Method string `json:"method"` + Path string `json:"path"` +} + +type Endpoint struct { + Path string `json:"path"` + Method string `json:"method"` + Description string `json:"description"` +} + +type HealthResponse struct { + Status string `json:"status"` + Timestamp string `json:"timestamp"` + UptimeSeconds int `json:"uptime_seconds"` +} + +var startTime = time.Now().UTC() + +func getHostname() string { + h, err := os.Hostname() + if err != nil { + return "unknown" + } + return h +} + +func getPlatformVersion() string { + f, err := os.Open("/etc/os-release") + if err != nil { + return runtime.GOOS + } + defer f.Close() + s := bufio.NewScanner(f) + for s.Scan() { + line := strings.TrimSpace(s.Text()) + if strings.HasPrefix(line, "PRETTY_NAME=") { + v := strings.TrimPrefix(line, "PRETTY_NAME=") + v = strings.Trim(v, "\"") + return v + } + } + return runtime.GOOS +} + +func uptime() (seconds int, human string) { + d := time.Since(startTime) + sec := int(d.Seconds()) + h := sec / 3600 + m := (sec % 3600) / 60 + return sec, fmt.Sprintf("%d hours, %d minutes", h, m) +} + +func nowUTC() string { + return time.Now().UTC().Format("2006-01-02T15:04:05.000Z") +} + +func clientIP(r *http.Request) string { + ra := r.RemoteAddr + if h := r.Header.Get("X-Forwarded-For"); h != "" { + if idx := strings.Index(h, ","); idx > 0 { + ra = strings.TrimSpace(h[:idx]) + } else { + ra = strings.TrimSpace(h) + } + return ra + } + host, _, err := net.SplitHostPort(ra) + if err != nil { + return ra + } + return host +} + +func mainHandler(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/" { + http.NotFound(w, r) + return + } + log.Printf("Handling GET /") + + sec, human := uptime() + info := ServiceInfo{ + Service: Service{ + Name: "devops-info-service", + Version: "1.0.0", + Description: "DevOps course info service", + Framework: "net/http", + }, + System: System{ + Hostname: getHostname(), + Platform: runtime.GOOS, + PlatformVersion: getPlatformVersion(), + Architecture: runtime.GOARCH, + CPUCount: runtime.NumCPU(), + GoVersion: strings.TrimPrefix(runtime.Version(), "go"), + }, + Runtime: Runtime{ + UptimeSeconds: sec, + UptimeHuman: human, + CurrentTime: nowUTC(), + Timezone: "UTC", + }, + Request: Request{ + ClientIP: clientIP(r), + UserAgent: r.Header.Get("User-Agent"), + Method: r.Method, + Path: r.URL.Path, + }, + Endpoints: []Endpoint{ + {Path: "/", Method: "GET", Description: "Service information"}, + {Path: "/health", Method: "GET", Description: "Health check"}, + }, + } + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(info) +} + +func healthHandler(w http.ResponseWriter, r *http.Request) { + log.Printf("Handling GET /health") + sec, _ := uptime() + resp := HealthResponse{ + Status: "healthy", + Timestamp: nowUTC(), + UptimeSeconds: sec, + } + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _ = json.NewEncoder(w).Encode(resp) +} + +func main() { + port := os.Getenv("PORT") + if port == "" { + port = "5000" + } + host := os.Getenv("HOST") + if host == "" { + host = "0.0.0.0" + } + addr := net.JoinHostPort(host, port) + log.Printf("Starting DevOps Info Service on %s", addr) + http.HandleFunc("/health", healthHandler) + http.HandleFunc("/", mainHandler) + log.Fatal(http.ListenAndServe(addr, nil)) +} diff --git a/app_go/main_test.go b/app_go/main_test.go new file mode 100644 index 0000000000..43f4170a9a --- /dev/null +++ b/app_go/main_test.go @@ -0,0 +1,67 @@ +package main + +import ( + "net/http" + "net/http/httptest" + "testing" +) + +func setupTestServer() *httptest.Server { + mux := http.NewServeMux() + mux.HandleFunc("/health", healthHandler) + mux.HandleFunc("/", mainHandler) + return httptest.NewServer(mux) +} + +func TestHealthEndpoint(t *testing.T) { + server := setupTestServer() + defer server.Close() + + resp, err := http.Get(server.URL + "/health") + if err != nil { + t.Fatalf("GET /health: %v", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + t.Errorf("expected status 200, got %d", resp.StatusCode) + } + + if ct := resp.Header.Get("Content-Type"); ct != "application/json" { + t.Errorf("expected Content-Type application/json, got %s", ct) + } +} + +func TestMainEndpoint(t *testing.T) { + server := setupTestServer() + defer server.Close() + + resp, err := http.Get(server.URL + "/") + if err != nil { + t.Fatalf("GET /: %v", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + t.Errorf("expected status 200, got %d", resp.StatusCode) + } + + if ct := resp.Header.Get("Content-Type"); ct != "application/json" { + t.Errorf("expected Content-Type application/json, got %s", ct) + } +} + +func Test404Endpoint(t *testing.T) { + server := setupTestServer() + defer server.Close() + + resp, err := http.Get(server.URL + "/nonexistent") + if err != nil { + t.Fatalf("GET /nonexistent: %v", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusNotFound { + t.Errorf("expected status 404, got %d", resp.StatusCode) + } +} diff --git a/app_python/.dockerignore b/app_python/.dockerignore new file mode 100644 index 0000000000..6c5dd5c36e --- /dev/null +++ b/app_python/.dockerignore @@ -0,0 +1,34 @@ +# Python +__pycache__/ +*.py[cod] +*.pyo +*.pyd +*.log +venv/ +.venv/ +.env +.env.* + +# Version control +.git/ +.gitignore + +# IDE / editor +.vscode/ +.idea/ +*.swp +*.swo + +# OS +.DS_Store +Thumbs.db + +docs/ +*.md +README* + +tests/ +pytest.ini +.pytest_cache/ +.coverage +htmlcov/ diff --git a/app_python/.gitignore b/app_python/.gitignore new file mode 100644 index 0000000000..3e627abc88 --- /dev/null +++ b/app_python/.gitignore @@ -0,0 +1,14 @@ +# Python +__pycache__/ +*.py[cod] +*.log +venv/ +.venv/ + +# IDE +.vscode/ +.idea/ + +# OS +.DS_Store + diff --git a/app_python/Dockerfile b/app_python/Dockerfile new file mode 100644 index 0000000000..99545fe69f --- /dev/null +++ b/app_python/Dockerfile @@ -0,0 +1,25 @@ +FROM python:3.13-slim + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 + +WORKDIR /app + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +RUN groupadd -r appgroup && useradd -r -g appgroup appuser + +RUN mkdir -p /data && chown appuser:appgroup /data + +COPY app.py . + +RUN chown -R appuser:appgroup /app + +USER appuser + +EXPOSE 5000 + +ENV PORT=5000 + +CMD ["python", "app.py"] diff --git a/app_python/README.md b/app_python/README.md new file mode 100644 index 0000000000..47b7641c94 --- /dev/null +++ b/app_python/README.md @@ -0,0 +1,115 @@ +## DevOps Info Service (Python) + +[![Python CI](https://github.com/abdughafforzoda/DevOps-Core-Course/actions/workflows/python-ci.yml/badge.svg)](https://github.com/abdughafforzoda/DevOps-Core-Course/actions/workflows/python-ci.yml) + +### Overview + +This is a simple **DevOps Info Service** implemented in Python using **Flask**. +It exposes HTTP endpoints that return detailed information about the service, the underlying system, and its runtime environment. +The service will be used as a foundation for future labs (containerization, CI/CD, monitoring, and more). + +### Prerequisites + +- **Python**: 3.11 or newer +- **Pip**: Python package manager +- Recommended: virtual environment (`venv`) + +### Installation + +```bash +cd app_python + +python -m venv venv +source venv/bin/activate + +pip install -r requirements.txt +``` + +### Running the Application + +Default configuration (host `0.0.0.0`, port `5000`): + +```bash +python app.py +``` + +Custom configuration using environment variables: + +```bash +PORT=8080 python app.py + +HOST=127.0.0.1 PORT=3000 DEBUG=true python app.py +``` + +### API Endpoints + +- `GET /` + - Returns service metadata, system information, runtime information, **persisted visit total** (`visits_total`), optional **config** (from env + `/config/config.json` when mounted), request details, and a list of available endpoints. Each request to `/` **increments** the visit counter stored at **`VISITS_DATA_PATH`** (default `/data/visits`). +- `GET /visits` + - Returns the current persisted visit total **without** incrementing, the resolved data file path, and a timestamp. +- `GET /health` + - Simple health check returning service status and uptime. + +### Configuration + +The application can be configured using the following environment variables: + +| Variable | Default | Description | +|---------|-----------|--------------------------------------| +| `HOST` | `0.0.0.0` | Address to bind the HTTP server to | +| `PORT` | `5000` | Port to listen on | +| `DEBUG` | `False` | Enable Flask debug mode if `true` | +| `VISITS_DATA_PATH` | `/data/visits` | Filesystem path for the visit counter file | +| `LOG_FORMAT` | (text) | Set to `json` for structured logs | +| `APP_CONFIG_ENV`, `LOG_LEVEL`, `FEATURE_DEBUG` | (unset) | Optional; injected via Kubernetes ConfigMap in Lab 12 | + +Examples: + +```bash +HOST=127.0.0.1 PORT=8000 python app.py +DEBUG=true python app.py +``` + +### Docker + +The application can be run as a Docker container. + +**Build the image locally:** + +```bash +docker build -t devops-info-service . +``` + +**Run a container:** + +```bash +docker run -p 5000:5000 devops-info-service +``` + +To **persist the visit counter** across container restarts, mount a volume on `/data` and optionally set `VISITS_DATA_PATH` (the image creates `/data` owned by the app user): + +```bash +docker run -p 5000:5000 -v devops-visits:/data -e VISITS_DATA_PATH=/data/visits devops-info-service +``` + +**Docker Compose (monitoring stack):** `monitoring/docker-compose.yml` binds `./data` to `/data` for the `app-python` service so `cat monitoring/data/visits` reflects the counter on the host. + +Map the container port (5000) to a host port of your choice: `-p :5000`. +Override `PORT` or `HOST` with environment variables if needed. + +**Pull from Docker Hub:** + +```bash +docker pull jambulancia/devops-info-service +docker run -p 5000:5000 jambulancia/devops-info-service +``` + +### Testing + +```bash +cd app_python +python -m venv venv +source venv/bin/activate +pip install -r requirements-dev.txt +pytest tests/ -v +``` diff --git a/app_python/app.py b/app_python/app.py new file mode 100644 index 0000000000..e47676bbbd --- /dev/null +++ b/app_python/app.py @@ -0,0 +1,379 @@ +""" +DevOps Info Service +Main Flask application module. +""" + +import json +import logging +import os +import platform +import socket +import tempfile +import threading +import time +from datetime import datetime, timezone + +from flask import Flask, g, jsonify, request +from prometheus_client import ( + CONTENT_TYPE_LATEST, + Counter, + Gauge, + Histogram, + generate_latest, +) + +USE_JSON_LOGGING = os.getenv("LOG_FORMAT", "").lower() == "json" + +app = Flask(__name__) + +# Configuration +HOST = os.getenv("HOST", "0.0.0.0") +PORT = int(os.getenv("PORT", 5000)) +DEBUG = os.getenv("DEBUG", "False").lower() == "true" + +# Application start time (for uptime calculation) +START_TIME = datetime.now(timezone.utc) + +# Logging configuration +if USE_JSON_LOGGING: + from pythonjsonlogger import jsonlogger + + handler = logging.StreamHandler() + formatter = jsonlogger.JsonFormatter() + handler.setFormatter(formatter) + logging.root.handlers = [handler] + logging.root.setLevel(logging.INFO) + logger = logging.getLogger(__name__) +else: + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + ) + logger = logging.getLogger(__name__) + +# Prometheus metrics (RED method: Rate, Errors, Duration) +http_requests_total = Counter( + "http_requests_total", + "Total HTTP requests", + ["method", "endpoint", "status"], +) +http_request_duration_seconds = Histogram( + "http_request_duration_seconds", + "HTTP request duration in seconds", + ["method", "endpoint"], + buckets=(0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0), +) +http_requests_in_progress = Gauge( + "http_requests_in_progress", + "HTTP requests currently being processed", +) +endpoint_calls = Counter( + "devops_info_endpoint_calls", + "Endpoint calls by endpoint", + ["endpoint"], +) +system_info_duration = Histogram( + "devops_info_system_collection_seconds", + "System info collection duration in seconds", + buckets=(0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5), +) + + +def _normalize_endpoint(path: str) -> str: + """Normalize path for low-cardinality metrics.""" + if path in ("/", "/health", "/metrics", "/visits"): + return path + return "other" + + +_visits_lock = threading.Lock() + + +def _visits_path() -> str: + return os.environ.get("VISITS_DATA_PATH", "/data/visits") + + +def _read_visits_unlocked() -> int: + path = _visits_path() + try: + with open(path, encoding="utf-8") as f: + return int((f.read() or "0").strip() or "0") + except (FileNotFoundError, ValueError, OSError): + return 0 + + +def _write_visits_atomic(n: int) -> None: + path = _visits_path() + parent = os.path.dirname(path) or "." + os.makedirs(parent, exist_ok=True) + fd, tmp = tempfile.mkstemp( + dir=parent, prefix=".visits_", suffix=".tmp", text=True + ) + try: + with os.fdopen(fd, "w", encoding="utf-8") as f: + f.write(str(n)) + os.replace(tmp, path) + except Exception: + try: + os.unlink(tmp) + except OSError: + pass + raise + + +def increment_visits() -> int: + """Increment persisted visit counter; returns new total.""" + with _visits_lock: + n = _read_visits_unlocked() + 1 + _write_visits_atomic(n) + return n + + +def get_visits() -> int: + """Return current persisted visit total.""" + with _visits_lock: + return _read_visits_unlocked() + + +def load_config_file() -> dict | None: + """Return parsed /config/config.json if present (Kubernetes ConfigMap mount).""" + path = "/config/config.json" + if not os.path.isfile(path): + return None + try: + with open(path, encoding="utf-8") as f: + return json.load(f) + except (json.JSONDecodeError, OSError): + logger.warning("Could not read or parse %s", path) + return None + + +# Startup log +logger.info( + "Application starting", + extra={"host": HOST, "port": PORT, "debug": DEBUG} if USE_JSON_LOGGING else {}, +) +logger.info( + "Visit counter path %s (initial count=%s)", + _visits_path(), + get_visits(), +) + + +@app.before_request +def log_request(): + """Log incoming request and record metrics start time.""" + g._request_start = time.perf_counter() + http_requests_in_progress.inc() + if USE_JSON_LOGGING: + logger.info( + "Request received", + extra={ + "method": request.method, + "path": request.path, + "client_ip": request.remote_addr or "unknown", + }, + ) + else: + logger.info("Request: %s %s from %s", request.method, request.path, request.remote_addr) + + +@app.after_request +def log_response(response): + """Log response status and record Prometheus metrics.""" + try: + if hasattr(g, "_request_start"): + duration = time.perf_counter() - g._request_start + endpoint = _normalize_endpoint(request.path) + http_requests_total.labels( + method=request.method, + endpoint=endpoint, + status=str(response.status_code), + ).inc() + http_request_duration_seconds.labels( + method=request.method, + endpoint=endpoint, + ).observe(duration) + endpoint_calls.labels(endpoint=endpoint).inc() + finally: + http_requests_in_progress.dec() + + if USE_JSON_LOGGING: + logger.info( + "Response sent", + extra={ + "method": request.method, + "path": request.path, + "status_code": response.status_code, + "client_ip": request.remote_addr or "unknown", + }, + ) + return response + + +def get_system_info() -> dict: + """Collect basic system information.""" + return { + "hostname": socket.gethostname(), + "platform": platform.system(), + "platform_version": platform.version(), + "architecture": platform.machine(), + "cpu_count": os.cpu_count(), + "python_version": platform.python_version(), + } + + +def get_uptime() -> dict: + """Calculate uptime in seconds and human-readable form.""" + delta = datetime.now(timezone.utc) - START_TIME + seconds = int(delta.total_seconds()) + hours = seconds // 3600 + minutes = (seconds % 3600) // 60 + return { + "uptime_seconds": seconds, + "uptime_human": f"{hours} hours, {minutes} minutes", + } + + +def get_request_info() -> dict: + """Extract request-related information.""" + user_agent = request.headers.get("User-Agent") or request.headers.get( + "user-agent" + ) + return { + "client_ip": request.remote_addr, + "user_agent": user_agent, + "method": request.method, + "path": request.path, + } + + +@app.route("/", methods=["GET"]) +def index(): + """Main endpoint — service, system, runtime, and request information.""" + logger.info("Handling / request") + + visits_total = increment_visits() + uptime_info = get_uptime() + + # Track system info collection duration + with system_info_duration.time(): + system_info = get_system_info() + + response = { + "service": { + "name": "devops-info-service", + "version": "1.0.0", + "description": "DevOps course info service", + "framework": "Flask", + }, + "system": system_info, + "runtime": { + "uptime_seconds": uptime_info["uptime_seconds"], + "uptime_human": uptime_info["uptime_human"], + "current_time": datetime.now(timezone.utc).isoformat(), + "timezone": "UTC", + }, + "visits_total": visits_total, + "config": { + "environment": os.environ.get("APP_CONFIG_ENV", "local"), + "log_level": os.environ.get("LOG_LEVEL", "INFO"), + "feature_debug": os.environ.get("FEATURE_DEBUG", "false"), + "file": load_config_file(), + }, + "request": get_request_info(), + "endpoints": [ + {"path": "/", "method": "GET", "description": "Service information"}, + {"path": "/health", "method": "GET", "description": "Health check"}, + {"path": "/visits", "method": "GET", "description": "Persisted visit counter"}, + {"path": "/metrics", "method": "GET", "description": "Prometheus metrics"}, + ], + } + + logger.debug("Response payload for / endpoint generated") + return jsonify(response) + + +@app.route("/metrics", methods=["GET"]) +def metrics(): + """Prometheus metrics endpoint.""" + return generate_latest(), 200, {"Content-Type": CONTENT_TYPE_LATEST} + + +@app.route("/visits", methods=["GET"]) +def visits(): + """Return current persisted root-path visit total (without incrementing).""" + total = get_visits() + return ( + jsonify( + { + "visits_total": total, + "data_path": _visits_path(), + "timestamp": datetime.now(timezone.utc).isoformat(), + } + ), + 200, + ) + + +@app.route("/health", methods=["GET"]) +def health(): + """Health check endpoint.""" + logger.info("Handling /health request") + uptime_info = get_uptime() + response = { + "status": "healthy", + "timestamp": datetime.now(timezone.utc).isoformat(), + "uptime_seconds": uptime_info["uptime_seconds"], + } + return jsonify(response), 200 + + +@app.errorhandler(404) +def not_found(error): + """Handle 404 Not Found errors.""" + if USE_JSON_LOGGING: + logger.warning( + "404 Not Found", + extra={"method": request.method, "path": request.path}, + ) + else: + logger.warning("404 Not Found: %s %s", request.method, request.path) + return ( + jsonify( + { + "error": "Not Found", + "message": "Endpoint does not exist", + "path": request.path, + } + ), + 404, + ) + + +@app.errorhandler(500) +def internal_error(error): + """Handle 500 Internal Server Error.""" + logger.exception( + "500 Internal Server Error", + extra={"error": str(error)} if USE_JSON_LOGGING else {}, + ) + return ( + jsonify( + { + "error": "Internal Server Error", + "message": "An unexpected error occurred", + } + ), + 500, + ) + + +def main(): + """Application entrypoint.""" + logger.info("Starting DevOps Info Service on %s:%s (debug=%s)", HOST, PORT, DEBUG) + app.run(host=HOST, port=PORT, debug=DEBUG) + + +if __name__ == "__main__": + main() diff --git a/app_python/docs/LAB01.md b/app_python/docs/LAB01.md new file mode 100644 index 0000000000..33312a0883 --- /dev/null +++ b/app_python/docs/LAB01.md @@ -0,0 +1,150 @@ +## LAB01 — DevOps Info Service (Python) + +### 1. Framework Selection + +**Chosen Framework:** Flask + +| Criterion | Flask | FastAPI | Django | +|------------------------|----------------------------------|----------------------------------------|----------------------------------------| +| Learning curve | Very beginner-friendly | Moderate (type hints, async) | Steeper (full framework) | +| Use case fit | Simple APIs and microservices | High-performance APIs | Large, full-featured web apps | +| Ecosystem / extensions | Mature ecosystem, many examples | Great docs, built-in OpenAPI docs | Includes ORM, admin, auth, templates | +| Setup complexity | Minimal | Minimal | Higher (project + apps structure) | +| For this lab | Ideal for quick REST services | Slightly more complex than necessary | Overkill | + +**Why Flask?** + +- The lab only needs two simple HTTP endpoints and JSON responses. +- Flask is lightweight, easy to understand, and perfect for a small service that will grow over time. +- There is a lot of learning material and community support, which is helpful for beginners. + +### 2. Best Practices Applied + +- **Clean Code Organization** + - Clear function names such as `get_system_info()`, `get_uptime()`, and `get_request_info()`. + - Configuration values (`HOST`, `PORT`, `DEBUG`) are defined at the top of `app.py`. + - A `main()` function is used as the entrypoint to keep `if __name__ == "__main__":` minimal. + +- **PEP 8 Compliance** + - Imports are grouped (standard library first, then third-party). + - Snake_case is used for function and variable names. + - Line lengths and spacing follow PEP 8 conventions. + +- **Error Handling** + - Custom handlers for `404` and `500` errors return JSON responses: + - `404` includes an error message and the invalid path. + - `500` returns a generic error message without leaking internals. + +- **Logging** + - Configured via `logging.basicConfig` with timestamp, logger name, level, and message. + - Logs when the application starts and when requests to `/` and `/health` are handled. + - Errors and 500s are logged with stack traces for easier debugging. + +### 3. API Documentation + +#### `GET /` + +- **Description:** Returns service, system, runtime, and request information, plus a list of available endpoints. +- **Example Request:** + +```bash +curl http://localhost:5000/ +``` + +- **Example Response (truncated):** + +```json +{ + "service": { + "name": "devops-info-service", + "version": "1.0.0", + "description": "DevOps course info service", + "framework": "Flask" + }, + "system": { + "hostname": "my-laptop", + "platform": "Linux", + "platform_version": "Ubuntu 24.04", + "architecture": "x86_64", + "cpu_count": 8, + "python_version": "3.11.0" + }, + "runtime": { + "uptime_seconds": 12, + "uptime_human": "0 hours, 0 minutes", + "current_time": "2026-01-27T14:30:00.000Z", + "timezone": "UTC" + }, + "request": { + "client_ip": "127.0.0.1", + "user_agent": "curl/7.81.0", + "method": "GET", + "path": "/" + }, + "endpoints": [ + { "path": "/", "method": "GET", "description": "Service information" }, + { "path": "/health", "method": "GET", "description": "Health check" } + ] +} +``` + +#### `GET /health` + +- **Description:** Simple health check used for readiness/liveness probes. +- **Example Request:** + +```bash +curl http://localhost:5000/health +``` + +- **Example Response:** + +```json +{ + "status": "healthy", + "timestamp": "2026-01-27T14:30:00.000Z", + "uptime_seconds": 42 +} +``` + +### 4. Testing Evidence + +**Manual Testing Commands** + +From the `app_python` directory: + +```bash +python app.py + +# In another terminal +curl http://localhost:5000/ | jq +curl http://localhost:5000/health | jq +``` + +- `curl` fetches the JSON responses. +- `jq` pretty-prints the JSON output for easier reading. + +**Screenshots (to be added by you):** + +Place the following screenshots in `app_python/docs/screenshots/`: + +- `01-main-endpoint.png` — Browser or terminal showing the full JSON from `GET /`. +- `02-health-check.png` — Response from `GET /health`. +- `03-formatted-output.png` — Pretty-printed JSON output (e.g., using `jq` or browser dev tools). + +### 5. Challenges & Solutions + +- **Challenge:** Calculating and formatting uptime correctly. + - **Solution:** Store a global `START_TIME` when the app starts and compute the time difference on each request, returning both seconds and a human-readable `"{hours} hours, {minutes} minutes"` string. + +- **Challenge:** Making the app configurable without changing code. + - **Solution:** Read `HOST`, `PORT`, and `DEBUG` from environment variables with sensible defaults, so the same code can run in different environments. + +### 6. GitHub Community + +- **Why starring repositories matters:** + Starring repositories is a lightweight way to bookmark useful projects and signal appreciation to maintainers. A higher star count helps good projects become more visible, attract contributors, and build trust in the open-source community. + +- **How following developers helps:** + Following professors, TAs, and classmates makes it easier to discover new projects, see how others solve problems, and stay connected with your learning community. Over time this builds a professional network and exposes you to real-world development practices. + diff --git a/app_python/docs/LAB02.md b/app_python/docs/LAB02.md new file mode 100644 index 0000000000..0fb0c3348c --- /dev/null +++ b/app_python/docs/LAB02.md @@ -0,0 +1,136 @@ +## LAB02 — Docker Containerization (Python) + +### 1. Docker Best Practices Applied + +#### Non-root user +- **What:** Created a dedicated `appuser` in `appgroup` and switched to it with `USER appuser`. +- **Why:** Running as root inside a container is a security risk. If the app is compromised, an attacker would have root access. Non-root users limit the blast radius and follow the principle of least privilege. + +```dockerfile +RUN groupadd -r appgroup && useradd -r -g appgroup appuser +# ... copy files, chown ... +USER appuser +``` + +#### Specific base image version +- **What:** Use `python:3.13-slim` instead of `python:3` or `python:latest`. +- **Why:** Pinned versions ensure reproducible builds and avoid surprises when base images change. `slim` is smaller than the full image (no build tools, fewer packages) while still being easy to work with. + +#### Layer ordering for caching +- **What:** Copy `requirements.txt` first, run `pip install`, then copy `app.py`. +- **Why:** Dependencies change less often than application code. Docker caches each layer; if only `app.py` changes, the `pip install` layer is reused, making rebuilds faster. + +```dockerfile +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt +COPY app.py . +``` + +#### Only copy necessary files +- **What:** Copy only `requirements.txt` and `app.py`; use `.dockerignore` to exclude the rest. +- **Why:** Smaller build context speeds up uploads to the Docker daemon. Fewer files in the image reduce attack surface and image size. + +#### .dockerignore +- **What:** Exclude `__pycache__`, `venv`, `.git`, `docs`, `tests`, IDE configs, etc. +- **Why:** These files are not needed at runtime and would bloat the build context and potentially the image. Excluding them speeds up builds and keeps images lean. + +#### PYTHONDONTWRITEBYTECODE and PYTHONUNBUFFERED +- **What:** Set `ENV PYTHONDONTWRITEBYTECODE=1 PYTHONUNBUFFERED=1`. +- **Why:** Avoid writing `.pyc` files and buffer stdout/stderr so logs appear immediately in `docker logs`. + +--- + +### 2. Image Information & Decisions + +| Decision | Choice | Justification | +|----------|--------|---------------| +| Base image | `python:3.13-slim` | Latest stable Python, slim variant for smaller size without sacrificing compatibility | +| Final image size | ~150–180 MB | Typical for python:3.13-slim + Flask; acceptable for a dev/lab service | +| Layer structure | Base → deps → user → app → USER | Dependencies first for cache, user setup before app copy, USER last | + +**Optimization choices:** +- `--no-cache-dir` with pip to avoid keeping package cache in the image +- Minimal COPYs (only `requirements.txt` and `app.py`) +- No build tools or unnecessary packages + +--- + +### 3. Build & Run Process + +**Build:** + +```bash +cd app_python +docker build -t devops-info-service . +``` + +![Docker build output](screenshots/docker_build.png) + +**Run container:** + +```bash +docker run -d -p 5000:5000 --name devops-app devops-info-service +``` + +![Docker run output](screenshots/docker_run.png) + +**Test endpoints:** + +```bash +curl http://localhost:5000/ +curl http://localhost:5000/health +``` + +**Docker Hub:** + +![Docker Hub repository](screenshots/dockerhub.png) + +**Pull and run from Docker Hub** (verifies image is publicly accessible): + +```bash +docker pull jambulancia/devops-info-service:latest +docker run -d -p 5000:5000 --name devops-app jambulancia/devops-info-service:latest +curl http://localhost:5000/health +``` + +![Pull and run from Docker Hub](screenshots/run_dockerhub.png) + +**Repository URL:** https://hub.docker.com/r/jambulancia/devops-info-service + +**Tagging strategy:** Image tagged as `jambulancia/devops-info-service:latest` — `latest` for the current stable build; version tags (e.g. `1.0.0`) can be added later for releases. + +--- + +### 4. Technical Analysis + +**Why does the Dockerfile work this way?** +- The app binds to `0.0.0.0` (all interfaces) by default, so it is reachable from outside the container. +- Port 5000 is exposed; `-p 5000:5000` maps host 5000 to container 5000. +- The `USER` directive ensures the process runs as `appuser`, not root. + +**What if layer order changed?** +- If we copied `app.py` before `pip install`, any code change would invalidate the cache for `pip install`, forcing a full reinstall on every build. +- Putting `USER` before `COPY` would cause permission errors unless we copy as root and then chown (which we do). + +**Security considerations:** +- Non-root user reduces impact of container escape or app compromise. +- Minimal base image and fewer files shrink the attack surface. +- No secrets or credentials in the image. + +**How does .dockerignore improve the build?** +- Reduces the amount of data sent to the Docker daemon during `docker build`. +- Avoids including `.git`, `venv`, or other large/unnecessary directories. +- Faster builds and cleaner images. + +--- + +### 5. Challenges & Solutions + +**Challenge:** Ensuring the app listens on `0.0.0.0` inside the container. +- **Solution:** The app already uses `HOST = os.getenv("HOST", "0.0.0.0")`, so it binds to all interfaces by default. No change needed. + +**Challenge:** Non-root user and file ownership. +- **Solution:** Create the user, copy files as root, run `chown -R appuser:appgroup /app`, then `USER appuser` so the app can read its files. + +**Challenge:** Keeping the image small. +- **Solution:** Use `python:3.13-slim`, `--no-cache-dir` for pip, and `.dockerignore` to exclude dev artifacts. diff --git a/app_python/docs/LAB03.md b/app_python/docs/LAB03.md new file mode 100644 index 0000000000..288378bd76 --- /dev/null +++ b/app_python/docs/LAB03.md @@ -0,0 +1,111 @@ +# LAB03 — Continuous Integration (CI/CD) + +## 1. Overview + +### Testing Framework + +**pytest** — Chosen for its simple syntax, strong fixture support, and wide adoption. It integrates well with Flask's test client and supports clear assertions. + +### Test Coverage + +- **GET /** — Status 200, JSON structure, required fields (service, system, runtime, request, endpoints), data types +- **GET /health** — Status 200, `status=healthy`, `uptime_seconds`, `timestamp` +- **404** — Non-existent paths return 404 with error structure +- **Request context** — User-Agent and path reflected in response + +### CI Workflow Triggers + +- **Push** to `master`, `lab02`, `lab03` +- **Pull request** to `master`, `lab02`, `lab03` +- **Path filter:** Only when `app_python/**` or `.github/workflows/python-ci.yml` changes + +### Versioning Strategy + +**CalVer (Calendar Versioning)** — Format `YYYY.MM.DD` (e.g. `2026.02.04`). Chosen because this is a service deployed continuously rather than a library with breaking- change semantics. CalVer gives clear, date-based versions without manual tagging. + +--- + +## 2. Workflow Evidence + +| Item | Link / Evidence | +|------|-----------------| +| Successful workflow run | [GitHub Actions](https://github.com/abdughafforzoda/DevOps-Core-Course/actions/workflows/python-ci.yml) | +| Tests passing locally | See terminal output below | +| Docker image on Docker Hub | [jambulancia/devops-info-service](https://hub.docker.com/r/jambulancia/devops-info-service) | +| Status badge | In `app_python/README.md` | + +**Tests passing locally:** + +``` +============================= test session starts ============================== +platform linux -- Python 3.13.3, pytest-9.0.2 +collected 13 items + +tests/test_app.py::test_index_returns_200 PASSED +tests/test_app.py::test_index_returns_json PASSED +tests/test_app.py::test_index_service_structure PASSED +tests/test_app.py::test_index_system_structure PASSED +tests/test_app.py::test_index_runtime_structure PASSED +tests/test_app.py::test_index_request_structure PASSED +tests/test_app.py::test_index_endpoints_list PASSED +tests/test_app.py::test_health_returns_200 PASSED +tests/test_app.py::test_health_returns_json PASSED +tests/test_app.py::test_health_structure PASSED +tests/test_app.py::test_404_nonexistent_endpoint PASSED +tests/test_app.py::test_404_wrong_method PASSED +tests/test_app.py::test_index_request_has_client_ip PASSED + +============================== 13 passed in 0.06s ============================== +``` + +--- + +## 3. Best Practices Implemented + +- **Dependency caching** — `actions/setup-python` with `cache: 'pip'` caches pip packages; speeds up jobs by ~30–60s on cache hit. +- **Concurrency** — `concurrency` cancels outdated workflow runs when new commits are pushed. +- **Path filters** — CI runs only when Python app files change, reducing unnecessary runs. +- **Job dependencies** — Docker job runs only after tests pass (`needs: test`). +- **Conditional Docker push** — Images pushed only on `push` (not on `pull_request`). +- **Snyk** — Vulnerability scan with `continue-on-error: true` so missing `SNYK_TOKEN` does not fail CI. Add `SNYK_TOKEN` for full scanning. +- **Docker layer caching** — `cache-from/cache-to: type=gha` reuses build layers between runs. + +--- + +## 4. Key Decisions + +| Decision | Choice | Rationale | +|----------|--------|-----------| +| **Versioning** | CalVer | Continuous deployment; date-based releases without manual version bumps. | +| **Docker tags** | `YYYY.MM.DD` + `latest` | E.g. `jambulancia/devops-info-service:2026.02.04` and `:latest`. | +| **Triggers** | Push + PR to master, lab02, lab03 | Validate changes before and after merge; runs on relevant branches. | +| **Test coverage** | All endpoints, structure, types | Ensures JSON shape and required fields; omits 500 handler due to needing forced failure. | +| **Linter** | Ruff | Fast, modern linter with good defaults. | + +--- + +## 6. Bonus: Multi-App CI with Path Filters + +A separate **Go CI** workflow (`.github/workflows/go-ci.yml`) runs when `app_go/**` changes. Both workflows use path filters so that: + +- Changes to `app_python/` → only Python CI runs +- Changes to `app_go/` → only Go CI runs +- Changes to both → both run in parallel +- Changes to `docs/` or `labs/` → neither runs + +**Benefits:** Fewer unnecessary runs, faster feedback, and lower Actions usage. + +--- + +## 7. Setup Required + +Before the workflow runs correctly: + +1. **Docker Hub** — Add secrets in GitHub: `Settings → Secrets and variables → Actions`: + - `DOCKERHUB_USERNAME`: your Docker Hub username + - `DOCKERHUB_TOKEN`: Docker Hub access token (create at hub.docker.com) + +2. **Snyk (optional)** — For security scanning: + - Create account at snyk.io + - Add `SNYK_TOKEN` as a GitHub secret + - Without it, the Snyk step is skipped (`continue-on-error: true`) diff --git a/app_python/docs/screenshots/01-main-endpoint.png b/app_python/docs/screenshots/01-main-endpoint.png new file mode 100644 index 0000000000..8d04514dcd Binary files /dev/null and b/app_python/docs/screenshots/01-main-endpoint.png differ diff --git a/app_python/docs/screenshots/02-health-check.png b/app_python/docs/screenshots/02-health-check.png new file mode 100644 index 0000000000..1147af9587 Binary files /dev/null and b/app_python/docs/screenshots/02-health-check.png differ diff --git a/app_python/docs/screenshots/03-formatted-output.png b/app_python/docs/screenshots/03-formatted-output.png new file mode 100644 index 0000000000..65dc6537b8 Binary files /dev/null and b/app_python/docs/screenshots/03-formatted-output.png differ diff --git a/app_python/docs/screenshots/docker_build.png b/app_python/docs/screenshots/docker_build.png new file mode 100644 index 0000000000..14b715f92a Binary files /dev/null and b/app_python/docs/screenshots/docker_build.png differ diff --git a/app_python/docs/screenshots/docker_run.png b/app_python/docs/screenshots/docker_run.png new file mode 100644 index 0000000000..c110c29eb8 Binary files /dev/null and b/app_python/docs/screenshots/docker_run.png differ diff --git a/app_python/docs/screenshots/dockerhub.png b/app_python/docs/screenshots/dockerhub.png new file mode 100644 index 0000000000..bdd833dfdf Binary files /dev/null and b/app_python/docs/screenshots/dockerhub.png differ diff --git a/app_python/docs/screenshots/run_dockerhub.png b/app_python/docs/screenshots/run_dockerhub.png new file mode 100644 index 0000000000..e55fb6e230 Binary files /dev/null and b/app_python/docs/screenshots/run_dockerhub.png differ diff --git a/app_python/pytest.ini b/app_python/pytest.ini new file mode 100644 index 0000000000..2476a922fb --- /dev/null +++ b/app_python/pytest.ini @@ -0,0 +1,5 @@ +[pytest] +testpaths = tests +python_files = test_*.py +python_functions = test_* +addopts = -v diff --git a/app_python/requirements-dev.txt b/app_python/requirements-dev.txt new file mode 100644 index 0000000000..597f5f362d --- /dev/null +++ b/app_python/requirements-dev.txt @@ -0,0 +1,4 @@ +-r requirements.txt +pytest>=8.0.0 +pytest-cov>=4.1.0 +ruff>=0.8.0 diff --git a/app_python/requirements.txt b/app_python/requirements.txt new file mode 100644 index 0000000000..12c86f2483 --- /dev/null +++ b/app_python/requirements.txt @@ -0,0 +1,4 @@ +Flask==3.1.0 +python-json-logger==2.0.7 +prometheus-client==0.23.1 + diff --git a/app_python/tests/__init__.py b/app_python/tests/__init__.py new file mode 100644 index 0000000000..95e046e0e0 --- /dev/null +++ b/app_python/tests/__init__.py @@ -0,0 +1,6 @@ +""" +Test package for the DevOps Info Service. + +Unit tests for this application will be added in Lab 3. +""" + diff --git a/app_python/tests/conftest.py b/app_python/tests/conftest.py new file mode 100644 index 0000000000..1387412e13 --- /dev/null +++ b/app_python/tests/conftest.py @@ -0,0 +1,20 @@ +"""Pytest fixtures for DevOps Info Service tests.""" + +import os +import tempfile + +import pytest + +# Set before importing app — startup code reads the counter path at import time. +_test_visits_dir = tempfile.mkdtemp(prefix="visits_lab12_test_") +os.environ["VISITS_DATA_PATH"] = os.path.join(_test_visits_dir, "visits") + +from app import app # noqa: E402 + + +@pytest.fixture +def client(): + """Create a Flask test client.""" + app.config["TESTING"] = True + with app.test_client() as c: + yield c diff --git a/app_python/tests/test_app.py b/app_python/tests/test_app.py new file mode 100644 index 0000000000..0eb9c69322 --- /dev/null +++ b/app_python/tests/test_app.py @@ -0,0 +1,144 @@ +"""Unit tests for DevOps Info Service endpoints.""" + + +def test_index_returns_200(client): + """GET / returns 200 OK.""" + response = client.get("/") + assert response.status_code == 200 + + +def test_index_returns_json(client): + """GET / returns valid JSON.""" + response = client.get("/") + assert response.content_type == "application/json" + data = response.get_json() + assert data is not None + + +def test_index_service_structure(client): + """GET / includes required service metadata.""" + response = client.get("/") + data = response.get_json() + assert "service" in data + service = data["service"] + assert service["name"] == "devops-info-service" + assert service["version"] == "1.0.0" + assert service["description"] == "DevOps course info service" + assert service["framework"] == "Flask" + + +def test_index_system_structure(client): + """GET / includes system info with required fields.""" + response = client.get("/") + data = response.get_json() + assert "system" in data + system = data["system"] + assert "hostname" in system + assert "platform" in system + assert "platform_version" in system + assert "architecture" in system + assert "cpu_count" in system + assert "python_version" in system + assert isinstance(system["cpu_count"], (int, type(None))) + + +def test_index_runtime_structure(client): + """GET / includes runtime info with required fields.""" + response = client.get("/") + data = response.get_json() + assert "runtime" in data + runtime = data["runtime"] + assert "uptime_seconds" in runtime + assert "uptime_human" in runtime + assert "current_time" in runtime + assert runtime["timezone"] == "UTC" + assert isinstance(runtime["uptime_seconds"], int) + + +def test_index_request_structure(client): + """GET / includes request info from the client.""" + response = client.get("/", headers={"User-Agent": "test-agent/1.0"}) + data = response.get_json() + assert "request" in data + req = data["request"] + assert "client_ip" in req + assert "user_agent" in req + assert req["user_agent"] == "test-agent/1.0" + assert req["method"] == "GET" + assert req["path"] == "/" + + +def test_index_endpoints_list(client): + """GET / includes list of available endpoints.""" + response = client.get("/") + data = response.get_json() + assert "endpoints" in data + endpoints = data["endpoints"] + assert len(endpoints) >= 2 + paths = [e["path"] for e in endpoints] + assert "/" in paths + assert "/health" in paths + assert "/visits" in paths + + +def test_visits_returns_200(client): + """GET /visits returns 200 OK.""" + response = client.get("/visits") + assert response.status_code == 200 + + +def test_visits_increments_on_index(client): + """GET / increments persisted visit counter; GET /visits reflects it.""" + c1 = client.get("/visits").get_json()["visits_total"] + client.get("/") + c2 = client.get("/visits").get_json()["visits_total"] + assert c2 == c1 + 1 + + +def test_health_returns_200(client): + """GET /health returns 200 OK.""" + response = client.get("/health") + assert response.status_code == 200 + + +def test_health_returns_json(client): + """GET /health returns valid JSON.""" + response = client.get("/health") + assert response.content_type == "application/json" + data = response.get_json() + assert data is not None + + +def test_health_structure(client): + """GET /health includes status, timestamp, uptime_seconds.""" + response = client.get("/health") + data = response.get_json() + assert data["status"] == "healthy" + assert "timestamp" in data + assert "uptime_seconds" in data + assert isinstance(data["uptime_seconds"], int) + + +def test_404_nonexistent_endpoint(client): + """GET /nonexistent returns 404 with error structure.""" + response = client.get("/nonexistent") + assert response.status_code == 404 + data = response.get_json() + assert "error" in data + assert data["error"] == "Not Found" + assert "path" in data + assert data["path"] == "/nonexistent" + + +def test_404_wrong_method(client): + """POST / returns 405 or 404 (method not allowed or not found).""" + response = client.post("/") + assert response.status_code in (404, 405) + + +def test_index_request_has_client_ip(client): + """Request info includes client_ip field.""" + response = client.get("/") + data = response.get_json() + assert "request" in data + assert "client_ip" in data["request"] diff --git a/k8s/ARGOCD.md b/k8s/ARGOCD.md new file mode 100644 index 0000000000..0e4f3a99a5 --- /dev/null +++ b/k8s/ARGOCD.md @@ -0,0 +1,215 @@ +# Lab 13 — Argo CD GitOps + +This document describes how Argo CD deploys the **`k8s/devops-python`** Helm chart from Git, how dev/prod are separated, and how to verify self-healing. Replace placeholders in manifests before use. + +## Prerequisites + +- A Kubernetes cluster (e.g. kind, minikube) with `kubectl` and `helm` configured. +- A **Git remote** (GitHub or other) hosting this repository so Argo CD can pull the chart. +- The Argo CD **Application** resources live in the **`argocd`** namespace; workloads deploy to **`dev`**, **`prod`**, or **`default`** as configured. + +## 1. Argo CD setup (Task 1) + +### Install (Helm) + +From the repository root: + +```bash +chmod +x k8s/argocd/install-argocd.sh +./k8s/argocd/install-argocd.sh +``` + +Or manually (as in the lab): + +```bash +helm repo add argo https://argoproj.github.io/argo-helm +helm repo update +kubectl create namespace argocd +helm install argocd argo/argo-cd --namespace argocd +kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=argocd-server -n argocd --timeout=180s +``` + +**Verification:** `kubectl get pods -n argocd` should show all components Running / Completed. + +### UI access + +```bash +kubectl port-forward svc/argocd-server -n argocd 8080:443 +``` + +Open **https://localhost:8080** (accept the self-signed certificate). +User: **admin**. Initial password: + +```bash +kubectl -n argocd get secret argocd-initial-admin-secret -o jsonpath='{.data.password}' | base64 -d +echo +``` + +### CLI + +Install `argocd` per [Argo CD CLI installation](https://argo-cd.readthedocs.io/en/stable/cli_installation/), then: + +```bash +argocd login localhost:8080 --insecure +argocd version +argocd app list +``` + +--- + +## 2. Configure Git source in manifests + +In **`k8s/argocd/*.yaml`**, set: + +| Field | Set to | +|--------|--------| +| `spec.source.repoURL` | Your repository HTTPS URL, e.g. `https://github.com//.git` | +| `spec.source.targetRevision` | Branch or tag, e.g. `main` or `lab13` | + +Argo CD must be able to reach the Git server. For **private** repositories, create credentials in Argo CD (UI: **Settings → Repositories**, or a Secret with the proper labels) — see [private repos](https://argo-cd.readthedocs.io/en/stable/user-guide/private-repositories/). + +--- + +## 3. Application manifests (Task 2) + +| File | Purpose | +|------|---------| +| `k8s/argocd/application.yaml` | Single app, **`default`** namespace, `values.yaml`, **manual** sync (learning / Task 2) | +| `k8s/argocd/application-dev.yaml` | **`dev`** namespace, `values-dev.yaml`, **auto-sync** + **selfHeal** + **prune** | +| `k8s/argocd/application-prod.yaml` | **`prod`** namespace, `values-prod.yaml`, **manual** sync only | + +**Helm chart path in Git:** `k8s/devops-python` (path is relative to the repository root). + +Apply **either** the Task 2 base app **or** the dev/prod pair — do not duplicate the same environment with two different Application names. + +**Example (dev + prod only):** + +```bash +kubectl apply -f k8s/argocd/namespaces.yaml # optional; CreateNamespace also works +kubectl apply -f k8s/argocd/application-dev.yaml +kubectl apply -f k8s/argocd/application-prod.yaml +``` + +**Sync dev** (or use the “Sync” button in the UI): + +```bash +argocd app sync devops-python-dev +``` + +**Sync prod** (manual policy — required when you want to promote): + +```bash +argocd app sync devops-python-prod +argocd app get devops-python-prod +``` + +**GitOps check:** change a value in the chart (e.g. `replicaCount` in `values-dev.yaml`), commit and push; after the Git poll interval, dev should **outofsync** and then **auto-sync** if automated policy is enabled. + +**Default Argo CD Git poll interval** is on the order of **~3 minutes**; use **Refresh** in the UI, **`argocd app sync`**, or a [webhook](https://argo-cd.readthedocs.io/en/stable/operator-manual/webhook/) for faster feedback. + +--- + +## 4. Multi-environment summary (Task 3) + +| | **dev** (`devops-python-dev`) | **prod** (`devops-python-prod`) | +|---|------------------------------|----------------------------------| +| Namespace | `dev` | `prod` | +| Values | `values-dev.yaml` | `values-prod.yaml` | +| Replicas (defaults in repo) | 1 | 5 | +| Sync | Automated + prune + selfHeal | Manual only | + +**Why manual prod?** + +Production changes are often gated by review, change windows, and rollback plans. **Manual sync** (or a pipeline that runs `argocd app sync` after approval) avoids every Git merge immediately changing live traffic. + +--- + +## 5. Self-healing and drift (Task 4) + +### 5.1 Self-heal (dev only) + +With **selfHeal: true**, Argo CD reconciles the live cluster back to the **Git-declared** desired state (Helm output after `helm template`), not to ad-hoc `kubectl` edits. + +**Example: manual scale (should be reverted on dev)** + +```bash +# Names follow Helm fullname: -devops-python +kubectl scale deployment -n dev -l "app.kubernetes.io/name=devops-python" --replicas=5 +# Watch: replica count should return to the value from values-dev (e.g. 1) after the next sync/self-heal +kubectl get deploy -n dev -w +argocd app get devops-python-dev +``` + +Record **time before** the manual scale and **time after** the Rollout (or Deployment) spec matches Git again. Exact timing depends on sync interval and controller latency. + +**Difference from Kubernetes “self-healing”:** + +- The **Deployment / ReplicaSet** recreates **Pods** when a Pod is deleted (desired replicas unchanged). +- **Argo CD** corrects **drift in resource specs** (e.g. replica count, labels) to match **Git** when selfHeal is on. + +### 5.2 Pod deletion (Kubernetes behavior) + +```bash +kubectl delete pod -n dev -l app.kubernetes.io/name=devops-python +kubectl get pods -n dev -w +``` + +New pods are created by the **ReplicaSet**; this does not require an Argo CD sync. + +### 5.3 Configuration drift (label edit) + +```bash +kubectl label deployment -n dev -l app.kubernetes.io/name=devops-python argo-test=manual --overwrite +argocd app diff devops-python-dev +``` + +With self-heal, the label may be removed when Argo CD reconciles. **Prod** without auto-sync stays out of sync until you run a **Sync** and choose whether to apply. + +### 5.4 When does Argo CD sync? + +- **Auto-sync (dev):** on detected Git changes (after poll) and periodic reconciliation, plus self-heal correcting drift. +- **Manual (prod):** only when a user (or automation) runs sync / CI triggers `argocd app sync`. +- **Kubernetes** keeps Pod count and restarts per **Controller** logic independent of Argo CD. + +--- + +## 6. Optional screenshots (submission) + +Place evidence under e.g. **`k8s/evidence/`** (your course’s convention) or attach to the lab report: + +- Argo CD **Applications** list showing **dev** and **prod**. +- **Application details** for one app: sync status, health, revision. +- **Resource tree** or **Sync** result after a change. + +*Example* `kubectl` output to capture: + +```bash +kubectl get applications -n argocd +kubectl get pods -n dev +kubectl get pods -n prod +``` + +--- + +## 7. Bonus — ApplicationSet + +`k8s/argocd/applicationset.yaml` uses a **List** generator and **goTemplate** to emit two **Applications** (`devops-python-dev` and `devops-python-prod`) with **automated** sync only for **dev**. + +**Do not** apply this **and** `application-dev.yaml` / `application-prod.yaml` for the same names. + +**Apply ApplicationSet only:** + +```bash +kubectl apply -f k8s/argocd/applicationset.yaml +``` + +**Benefits:** one manifest for N environments, shared `repoURL` / `path`, parameters per list row. For more environments or clusters, consider **Matrix**, **Cluster**, or **Git** generators — see [ApplicationSet](https://argo-cd.readthedocs.io/en/stable/operator-manual/applicationset/). + +--- + +## 8. References + +- [Argo CD — Getting started](https://argo-cd.readthedocs.io/en/stable/getting_started/) +- [Application spec](https://argo-cd.readthedocs.io/en/stable/operator-manual/declarative-setup/) +- [Automated sync policy](https://argo-cd.readthedocs.io/en/stable/user-guide/auto_sync/) +- [Helm chart: argo/argo-cd](https://github.com/argoproj/argo-helm/tree/main/charts/argo-cd) diff --git a/k8s/CONFIGMAPS.md b/k8s/CONFIGMAPS.md new file mode 100644 index 0000000000..a5cfc37b9e --- /dev/null +++ b/k8s/CONFIGMAPS.md @@ -0,0 +1,129 @@ +# Lab 12 — ConfigMaps and persistent storage + +This document describes how the DevOps Info Service uses **ConfigMaps** (file + environment), a **PersistentVolumeClaim** for the visit counter, and how to verify behavior in a cluster. + +## Application changes + +### Visit counter + +- Each **GET /** increments a counter stored in a file whose path is set by **`VISITS_DATA_PATH`** (default `/data/visits`). +- The file is created on first write. Parent directories are created if needed. +- Writes use a **temporary file in the same directory** and **`os.replace`** for an atomic rename on the same filesystem. +- A **`threading.Lock`** serializes read–increment–write to avoid lost updates under concurrent requests. +- **GET /visits** returns the current total **without** incrementing, plus the resolved path and a timestamp. + +### Local Docker Compose + +`monitoring/docker-compose.yml` mounts **`./data` → `/data`** in the app container and sets **`VISITS_DATA_PATH=/data/visits`**. After several requests to `/`, you can inspect the file on the host: + +```bash +cat monitoring/data/visits +``` + +Restart the `app-python` container and call **GET /visits** (or **GET /**) again; the counter should continue from the last value. + +## Helm: ConfigMaps + +The chart `k8s/devops-python` defines two ConfigMaps when **`config.enabled`** is true (see `values.yaml`): + +| Resource | Purpose | +|----------|---------| +| `{{ release-name }}-devops-python-file` | File **`config.json`** from `files/config.json` (via `.Files.Get`) | +| `{{ release-name }}-devops-python-env` | Keys **`APP_CONFIG_ENV`**, **`LOG_LEVEL`**, **`FEATURE_DEBUG`** from values | + +### File mount + +The Rollout (or Deployment) mounts the file ConfigMap at **`/config`**, so the app reads **`/config/config.json`** (see `load_config_file()` in `app_python/app.py`). The JSON is included in **GET /** under **`config.file`** when the file exists. + +### Environment variables + +**`envFrom`** includes **`configMapRef`** for the env ConfigMap (when **`config.injectEnv`** is true) **in addition to** any **`secretRef`** from Lab 11. The app exposes these in **GET /** under **`config.environment`**, **`config.log_level`**, and **`config.feature_debug`**. + +### Verification commands + +Replace `` and `` as appropriate. + +```bash +kubectl get configmap,pvc -n -l app.kubernetes.io/instance= +kubectl exec deploy/-devops-python -n -- cat /config/config.json +kubectl exec deploy/-devops-python -n -- printenv | grep -E '^(APP_CONFIG_ENV|LOG_LEVEL|FEATURE_DEBUG)=' +``` + +Example output shapes (your names and ages will differ): + +```text +NAME DATA AGE +configmap/-devops-python-env 3 1m +configmap/-devops-python-file 1 1m + +NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE +persistentvolumeclaim/-devops-python-data Bound pvc-... 100Mi RWO standard 1m +``` + +## Persistent volume (visit counter) + +### PVC template + +`templates/pvc.yaml` creates a claim **`{{ fullname }}-data`** when **`persistence.enabled`** is true: + +- **Access mode:** `ReadWriteOnce` (one node can mount the volume read-write at a time). +- **Size:** `persistence.size` (default `100Mi`). +- **Storage class:** omitted if `persistence.storageClass` is empty (cluster default). + +The Rollout mounts this PVC at **`/data`**, matching **`VISITS_DATA_PATH=/data/visits`** in chart values. + +### RWO and replicas + +With **ReadWriteOnce**, only **one** pod can attach the volume on many multi-node clusters. **kind** / **minikube** single-node setups often allow multiple pods on the same node, but that is not portable. For production-like multi-node clusters, either: + +- run **one replica** while using this PVC, or +- use **ReadWriteMany** / shared storage if you need multiple writers. + +`values-prod.yaml` sets **`replicaCount: 5`**; if you enable this PVC in a multi-node environment, scale down or switch storage class accordingly. + +### Persistence test (pod delete) + +1. Note **`visits_total`** from **GET /visits** (via port-forward or Ingress). +2. Delete only the pod (ReplicaSet / Rollout will recreate it): + + ```bash + kubectl delete pod -n -l app.kubernetes.io/instance= + ``` + +3. Wait for the new pod to become ready, then call **GET /visits** again. The count should match the pre-delete value. +4. Optional: `kubectl exec` and `cat /data/visits` before and after. + +## ConfigMap vs Secret + +| Use ConfigMap | Use Secret | +|---------------|------------| +| Non-sensitive settings (log level, feature flags, app name, JSON config) | Passwords, API keys, TLS private keys, database URLs with credentials | +| Readable by anyone with `get/list` on ConfigMaps in the namespace | Base64-encoded at rest; restrict RBAC; consider encryption at rest / external secret managers | + +**Rule of thumb:** if disclosure would harm security or compliance, use a **Secret** (or Vault, as in Lab 11), not a ConfigMap. + +## Bonus: reload and checksum annotations + +### Mounted ConfigMap updates + +For a **directory** mount (not **`subPath`**), the kubelet periodically refreshes files when the ConfigMap changes; total delay can be on the order of a minute or more depending on sync and cache behavior. See [Mounted ConfigMaps are updated automatically](https://kubernetes.io/docs/concepts/configuration/configmap/#mounted-configmaps-are-updated-automatically). + +### `subPath` + +If you mount a single file with **`subPath`**, the file is **not** updated when the ConfigMap changes, because it is effectively copied at mount time. Avoid **`subPath`** when you need hot reload of individual files from a ConfigMap; prefer mounting the whole directory. + +### Pod restart on config change (this chart) + +When **`configMapChecksum.enabled`** and **`config.enabled`** are true, the Pod template includes annotations: + +- **`checksum/config-file`** — SHA-256 of `files/config.json` +- **`checksum/config-env`** — SHA-256 of a string derived from **`config.environment`**, **`config.logLevel`**, and **`config.featureDebug`** + +Changing those inputs changes the annotation values, which updates the pod template and triggers a **rolling restart** so the app sees new env vars and file content immediately (without relying on kubelet sync alone). + +To simulate a change, run **`helm upgrade`** after editing **`values.yaml`** or **`files/config.json`**, then observe a new ReplicaSet and rolled pods: + +```bash +helm upgrade ./k8s/devops-python -n -f k8s/devops-python/values.yaml +kubectl rollout status deploy/-devops-python -n +``` diff --git a/k8s/HELM.md b/k8s/HELM.md new file mode 100644 index 0000000000..c89b912701 --- /dev/null +++ b/k8s/HELM.md @@ -0,0 +1,153 @@ +# Lab 10 — Helm (Chart + Multi-env + Hooks) + +This lab converts the Lab 9 Kubernetes manifests into a reusable Helm chart with environment values and lifecycle hooks. + +Chart location: `k8s/devops-python/` + +**Lab 11 (Secrets & Vault):** see `k8s/SECRETS.md` for Kubernetes Secrets, Helm `templates/secrets.yaml`, and optional HashiCorp Vault Agent injection. + +--- + +## Task 1 — Helm Fundamentals (Evidence) + +Run and capture output: + +```bash +helm version +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm repo update +helm show chart prometheus-community/kube-prometheus-stack | head -50 +``` + +Write a short explanation: +- **Chart** = package of templates + values +- **Release** = installed instance of a chart in a namespace +- **Values** = configuration inputs that customize the templates + +--- + +## Task 2 — Create Your Helm Chart + +### Chart structure + +``` +k8s/devops-python/ +├── Chart.yaml +├── values.yaml +├── values-dev.yaml +├── values-prod.yaml +└── templates/ + ├── _helpers.tpl + ├── rollout.yaml + ├── statefulset.yaml + ├── service.yaml + ├── service-headless.yaml + ├── NOTES.txt + └── hooks/ + ├── pre-install-job.yaml + └── post-install-job.yaml +``` + +### Key templated values + +- **Init (Lab 16)**: `.Values.initContainers.*` — shared file under `/init` on the app container +- **ServiceMonitor (bonus)**: `.Values.serviceMonitor.*` (Prometheus Operator) +- **Workload**: `.Values.workload.kind` — `rollout` (Argo Rollouts, Lab 14) or `statefulSet` (Lab 15; `-f values-statefulset.yaml`) +- **Replicas**: `.Values.replicaCount` +- **Resources**: `.Values.resources` +- **Service**: `.Values.service.type`, `.Values.service.port`, `.Values.service.nodePort` +- **Health checks**: `.Values.probes.*` (enabled + timings) +- **Labels/names**: helper templates in `_helpers.tpl` + +--- + +## Task 3 — Multi-Environment Support + +- **Dev**: `values-dev.yaml` (1 replica, relaxed resources, NodePort) +- **Prod**: `values-prod.yaml` (5 replicas, bigger resources, LoadBalancer-ready) + +Install dev: + +```bash +helm install devops-dev k8s/devops-python -f k8s/devops-python/values-dev.yaml +``` + +Upgrade to prod: + +```bash +helm upgrade devops-dev k8s/devops-python -f k8s/devops-python/values-prod.yaml +``` + +Evidence: + +```bash +helm list +helm get values devops-dev +kubectl get rollout,sts,svc,pods +``` + +--- + +## Task 4 — Chart Hooks (Pre/Post Install Jobs) + +Hooks are implemented as Kubernetes Jobs with annotations: +- `pre-install` job: runs before install +- `post-install` job: runs after install + +Hook config lives in `values.yaml` under `hooks.*`. + +Verify hook resources: + +```bash +helm install --dry-run --debug hooktest k8s/devops-python | sed -n '1,220p' +kubectl get jobs +``` + +Evidence commands during real install: + +```bash +helm install hookrun k8s/devops-python +kubectl get jobs -w +kubectl logs job/$(kubectl get jobs -o name | grep hookrun | head -1 | cut -d/ -f2) +kubectl get jobs +``` + +Deletion policy is set so successful hooks are deleted (`hook-succeeded`) and old hook jobs are removed before recreation (`before-hook-creation`). + +--- + +## Task 5 — Testing & Validation + Operations + +Validate chart: + +```bash +helm lint k8s/devops-python +helm template devops k8s/devops-python | head -80 +helm install --dry-run --debug devops k8s/devops-python +``` + +Install / upgrade / rollback / uninstall: + +```bash +helm install devops k8s/devops-python +helm upgrade devops k8s/devops-python --set replicaCount=4 +helm history devops +helm rollback devops 1 +helm uninstall devops +``` + +App access: + +```bash +kubectl get svc +kubectl port-forward service/devops-dev-devops-python 8080:80 +curl http://localhost:8080/health +``` + +--- + +## Notes + +- Health checks are **not** removed; they are configurable via values. +- Avoid putting secrets in values.yaml; use Kubernetes Secrets + Helm values references (Lab 11). + diff --git a/k8s/MONITORING.md b/k8s/MONITORING.md new file mode 100644 index 0000000000..465b8a6df3 --- /dev/null +++ b/k8s/MONITORING.md @@ -0,0 +1,120 @@ +# Lab 16 — Kubernetes monitoring & init containers + +This document supports **kube-prometheus-stack** (Prometheus Operator, Prometheus, Grafana, Alertmanager, **kube-state-metrics**, **node-exporter**) and the **init containers** / **ServiceMonitor** additions in the `devops-python` Helm chart. + +## 1. Stack components (Task 1) + +| Component | Role | +|-----------|------| +| **Prometheus Operator** | CRDs + controllers: `Prometheus`, `Alertmanager`, `ServiceMonitor`, `PodMonitor`, … | +| **Prometheus** | TSDB + scrapers; discovers targets via `ServiceMonitor` selectors | +| **Alertmanager** | Alert routing, silencing, receivers (Slack, email, …) | +| **Grafana** | Dashboards; often bundled with Prometheus datasource | +| **kube-state-metrics** | Exposes K8s object state as metrics (Deployments, Pods, …) | +| **node-exporter** | Node CPU, memory, disk, network (host metrics) | + +## 2. Installation (Helm) + +```bash +chmod +x k8s/monitoring/install-kube-prometheus-stack.sh +./k8s/monitoring/install-kube-prometheus-stack.sh +``` + +Or manually (match your course’s release name — default **`monitoring`**): + +```bash +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm repo update +helm install monitoring prometheus-community/kube-prometheus-stack \ + --namespace monitoring --create-namespace +``` + +**Evidence (capture for your report):** + +```bash +kubectl get pods,svc -n monitoring +``` + +Expected: Prometheus, Grafana, Alertmanager, Operator, kube-state-metrics, node-exporter pods eventually **Running**. + +## 3. Grafana & dashboards (Task 2) + +**Port-forward** (release name `monitoring`): + +```bash +kubectl port-forward svc/monitoring-grafana -n monitoring 3000:80 +``` + +Open **http://localhost:3000**. If you installed with **`install-kube-prometheus-stack.sh`**, Grafana is set to **admin** / **admin** via `grafana.adminPassword`. For a cluster-default install without that flag, the password is often **prom-operator** or stored in a Secret: + +```bash +kubectl get secret monitoring-grafana -n monitoring -o jsonpath='{.data.admin-password}' | base64 -d +echo +``` + +**Alertmanager UI:** + +```bash +kubectl port-forward svc/monitoring-kube-prometheus-alertmanager -n monitoring 9093:9093 +``` + +### Dashboard questions (answer in Grafana — add screenshots to your submission) + +Use built-in charts such as **Kubernetes / Compute Resources / Namespace (Pods)**, **Kubernetes / Compute Resources / Pod**, **Node Exporter / Nodes**, **Kubernetes / Kubelet**. + +1. **Pod resources** — CPU/memory for your **StatefulSet** (or Rollout) pods in the target namespace. +2. **Namespace** — Which **Pods** in `default` (or your app namespace) use the **most / least** CPU? +3. **Nodes** — Memory (% and absolute), CPU cores (node exporter / cluster views). +4. **Kubelet** — Pods/containers managed (Kubelet dashboards / summary panels). +5. **Network** — Traffic for Pods in your namespace (where available in your stack version). +6. **Alerts** — Count **firing** alerts in Grafana; cross-check in Alertmanager **/#/alerts**. + +## 4. Init containers (Task 3) + +The chart runs **two optional init containers** when `initContainers.enabled: true` (default): + +1. **`wait-for-dns`** — loops until `nslookup` succeeds for **`kubernetes.default.svc.cluster.local`** (cluster DNS up). +2. **`init-download`** — **`wget`** saves **`http://example.com`** to **`index.html`** on a shared **`emptyDir`** (`init-workdir`). + +The main app container mounts that volume **read-only** at **`/init`** (file path **`/init/index.html`**). + +**Verify:** + +```bash +kubectl get pods -w +kubectl logs -c wait-for-dns +kubectl logs -c init-download +kubectl exec -it -c devops-python -- head -5 /init/index.html +``` + +Disable or tweak in **`values.yaml`** under **`initContainers.*`**. + +## 5. Custom metrics & ServiceMonitor (bonus) + +The Python app already exposes **`GET /metrics`** (`prometheus_client` from earlier labs). + +Enable the chart’s **ServiceMonitor** (requires **Prometheus Operator** CRDs from kube-prometheus-stack): + +```bash +helm upgrade --install myapp ./k8s/devops-python -n \ + --set serviceMonitor.enabled=true \ + --set serviceMonitor.releaseLabel=monitoring +``` + +Or **`-f k8s/devops-python/values-servicemonitor.yaml`** (and ensure **`releaseLabel`** matches your kube-prometheus-stack Helm **release name**). + +The `ServiceMonitor` selects the app **Service** by **`app.kubernetes.io/name`** / **`instance`** and scrapes **`port: http`**, **`path: /metrics`**. + +**Prometheus UI:** + +```bash +kubectl port-forward svc/monitoring-kube-prometheus-prometheus -n monitoring 9090:9090 +``` + +Check **Status → Targets** for your job; run **Explore** / **Graph** with e.g. `http_requests_total`. + +## 6. References + +- [kube-prometheus-stack](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack) +- [Init containers](https://kubernetes.io/docs/concepts/workloads/pods/init-containers/) +- [ServiceMonitor](https://prometheus-operator.dev/docs/developer-guide/api/) diff --git a/k8s/README.md b/k8s/README.md new file mode 100644 index 0000000000..89fd52104e --- /dev/null +++ b/k8s/README.md @@ -0,0 +1,170 @@ +# Lab 9 — Kubernetes Fundamentals (Manifests + Operations) + +This folder contains the Kubernetes manifests to deploy the course Python app to a local Kubernetes cluster (minikube or kind) using best practices: replicas, resource requests/limits, probes, and rolling updates. + +--- + +## Architecture Overview + +- **Deployment**: `devops-python` (default namespace) + - **Replicas**: 3 (scale to 5 for Task 4) + - **Container port**: 5000 + - **Health endpoint**: `/health` + - **Resources**: requests/limits set + - **Strategy**: rolling update with `maxUnavailable: 0` +- **Service**: `devops-python` (NodePort) + - Service port **80** → Pod port **5000** + - NodePort: **30080** + +Traffic flow: + +`Client → NodeIP:30080 → Service(devops-python:80) → Pods(devops-python:5000)` + +--- + +## Manifest Files + +- **`deployment.yml`** + - `replicas: 3` + - `readinessProbe`, `livenessProbe`, `startupProbe` on `/health` + - resource requests/limits + - securityContext: `runAsNonRoot`, drop caps, `readOnlyRootFilesystem` +- **`service.yml`** + - NodePort service exposing the app locally on port `30080` + +--- + +## Task 1 — Local Kubernetes Setup (Evidence) + +Pick one: +- **minikube** (recommended for local dev UX) +- **kind** (lightweight; good for CI-like environments) + +Evidence commands (paste outputs into your report): + +```bash +kubectl cluster-info +kubectl get nodes -o wide +kubectl get namespaces +``` + +--- + +## Task 2 & 3 — Deploy and Expose the App + +Apply manifests: + +```bash +kubectl apply -f k8s/deployment.yml +kubectl apply -f k8s/service.yml +``` + +Verify: + +```bash +kubectl get deployments +kubectl get pods -o wide +kubectl get svc -o wide +kubectl describe deployment devops-python +kubectl get endpoints devops-python +``` + +### Accessing the service + +#### Option A: minikube + +```bash +minikube service devops-python --url +curl "$(minikube service devops-python --url)/" +curl "$(minikube service devops-python --url)/health" +``` + +#### Option B: kind / generic cluster (port-forward) + +```bash +kubectl port-forward service/devops-python 8080:80 +curl http://localhost:8080/ +curl http://localhost:8080/health +``` + +--- + +## Task 4 — Scaling, Rolling Updates, Rollback + +### Scaling to 5 replicas + +**Declarative** (edit `replicas` in `k8s/deployment.yml` then apply) OR do quick imperative scaling: + +```bash +kubectl scale deployment/devops-python --replicas=5 +kubectl get pods -w +kubectl rollout status deployment/devops-python +``` + +Capture evidence: + +```bash +kubectl get deployment devops-python -o wide +kubectl get pods -l app=devops-python +``` + +### Rolling update + +Update the image (example: change tag): + +```bash +kubectl set image deployment/devops-python app=jambulancia/devops-info-service:latest +kubectl rollout status deployment/devops-python +kubectl rollout history deployment/devops-python +``` + +Watch pods during rollout: + +```bash +kubectl get pods -l app=devops-python -w +``` + +### Rollback + +```bash +kubectl rollout undo deployment/devops-python +kubectl rollout status deployment/devops-python +kubectl rollout history deployment/devops-python +``` + +--- + +## Production Considerations + +- **Health checks**: `startupProbe` avoids killing slow-start containers; `readinessProbe` prevents traffic until ready; `livenessProbe` restarts unhealthy Pods. +- **Resources**: requests ensure scheduling; limits prevent noisy-neighbor issues. +- **Security**: non-root, runtime default seccomp, drop Linux capabilities, read-only filesystem. +- **Improvements for real prod**: + - Use **Ingress / Gateway API** instead of NodePort + - Add **HPA** (autoscaling) based on CPU/RPS + - Add **PodDisruptionBudget** and anti-affinity + - Use ConfigMaps/Secrets for config + - Add metrics/logging/alerting (Labs 7–8 already) + +--- + +## Challenges & Solutions (Fill In) + +Common debugging commands: + +```bash +kubectl describe pod +kubectl logs +kubectl get events --sort-by=.metadata.creationTimestamp +``` + +Write what you hit (image pull, probes, port mismatch, etc.) and how you fixed it. + +--- + +## Lab 11 — Secrets & Vault (Helm chart) + +The Helm chart under `k8s/devops-python/` supports Kubernetes Secrets (`templates/secrets.yaml`), a dedicated ServiceAccount for Vault Kubernetes auth, and optional Vault Agent Injector annotations. + +Documentation and commands: **`k8s/SECRETS.md`**. + diff --git a/k8s/ROLLOUTS.md b/k8s/ROLLOUTS.md new file mode 100644 index 0000000000..cbbcb38363 --- /dev/null +++ b/k8s/ROLLOUTS.md @@ -0,0 +1,133 @@ +# Lab 14 — Argo Rollouts (progressive delivery) + +The `k8s/devops-python` chart uses **`workload.kind: rollout`** (default) for an Argo **Rollout** with **canary** or **blue-green** strategies. For **StatefulSet** + per-pod PVC (Lab 15), set **`workload.kind: statefulSet`** — see `k8s/STATEFULSET.md`. + +## Prerequisites + +- Kubernetes cluster +- [Argo Rollouts](https://argoproj.github.io/argo-rollouts/) installed in the cluster +- (Recommended) [kubectl argo rollouts](https://argoproj.github.io/argo-rollouts/installation/#kubectl-plugin-installation) plugin + +## 1. Install the Rollouts controller and dashboard (Task 1) + +```bash +chmod +x k8s/rollouts/install-argo-rollouts.sh +./k8s/rollouts/install-argo-rollouts.sh +``` + +Or use the [upstream manifests](https://argoproj.github.io/argo-rollouts/installation/) directly. + +**Verify:** + +```bash +kubectl get pods -n argo-rollouts +kubectl argo rollouts version # with plugin installed +``` + +**Dashboard (Task 1):** after the script (or `kubectl apply` of `dashboard-install.yaml`): + +```bash +kubectl port-forward svc/argo-rollouts-dashboard -n argo-rollouts 3100:3100 +``` + +Open **http://localhost:3100** and open your namespace / Rollout from the UI. + +**Rollout vs Deployment:** a **Rollout** is API-compatible in `spec.template` and `spec.selector` with a Deployment, but `spec.strategy` is replaced with **canary** or **blueGreen** options that support traffic weights, pauses, analysis, and (with ingress/service mesh) fine-grained routing. + +## 2. Install the app (Helm) + +**Canary (default `values.yaml`):** + +```bash +helm upgrade --install myapp ./k8s/devops-python -n --create-namespace +``` + +**Blue-green:** + +```bash +helm upgrade --install myapp ./k8s/devops-python -n -f k8s/devops-python/values-bluegreen.yaml +``` + +The main Service (unchanged) is the **active** production Service. When using blue-green, a second **preview** Service is created: `-devops-python-preview` (e.g. for smoke tests on the new stack before `promote`). + +## 3. Canary strategy (Task 2) + +Configured in `templates/rollout.yaml` and `values.yaml` (`rollout.strategy: canary`). + +**Steps (lab spec):** + +| Step | Action | +|------|--------| +| 1 | `setWeight: 20` | +| 2 | Optional **analysis** (see bonus) if `rollout.analysis.enabled: true` | +| 3 | `pause: {}` — **manual** promotion (`kubectl argo rollouts promote ...`) | +| 4 | `setWeight: 40` → `pause: 30s` | +| 5 | `setWeight: 60` → `pause: 30s` | +| 6 | `setWeight: 80` → `pause: 30s` | +| 7 | `setWeight: 100` | + +**CLI:** + +```bash +kubectl argo rollouts get rollout -n -w +kubectl argo rollouts promote -n +kubectl argo rollouts abort -n +kubectl argo rollouts retry rollout -n +``` + +Trigger an upgrade (e.g. `helm upgrade ... --set image.tag=newtag`), then use the **dashboard** or `kubectl` to watch ReplicaSet weights and follow pauses. + +**Replicas:** canary `setWeight` is a **percentage of desired pods** going to the new version. For meaningful steps, use **at least 2** replicas in the target environment (see `values-dev.yaml`). + +**Note:** without a service mesh or ingress integration, “traffic split” is approximated by **replica count** of stable vs canary; for exact HTTP splits you would add a traffic router (NGINX, Istio, etc.) per the [Argo Rollouts traffic management](https://argoproj.github.io/argo-rollouts/features/traffic-management/) docs. + +**Rollback / abort:** `abort` stops the canary; stable ReplicaSet should serve again while you fix Git/Helm. + +## 4. Blue-green (Task 3) + +Use `-f k8s/devops-python/values-bluegreen.yaml` or set in your own values file: + +- `rollout.strategy: blueGreen` +- `rollout.blueGreen.activeService` / `previewService` are implied in the template as `` and `-preview` (the chart wires them to the two Services) +- `autoPromotionEnabled: false` for **manual** promotion to production +- `service.previewNodePort: 30081` when `service.type: NodePort` (active uses `30080` by default) + +**Flow (conceptual):** + +1. Baseline runs as **active** (blue). +2. Change app version → Rollout brings up **green**; **preview** Service targets the new ReplicaSet. +3. Test: `kubectl port-forward svc/-preview ...` and hit `/` or `/health`. +4. **Promote:** `kubectl argo rollouts promote -n ` — active switches to green; optional delay before scale-down (`scaleDownDelaySeconds`). + +**Blue-green vs canary (Task 4):** + +| | Canary | Blue-green | +|---|--------|------------| +| Risk | Gradual; mixed version traffic at weights | All-or-nothing cutover when promoted | +| Resources | Often fewer extra pods (weighted mix) | Often **two** full stacks while preview exists | +| Best when | You want % rollout + pauses for metrics | You want full new stack tested behind preview Service | + +**Practical pick:** canary for gradual, metric-gated release; blue-green for full UAT of the next stack before a single **promote** event. + +## 5. Bonus — automated analysis (optional) + +- Set `rollout.analysis.enabled: true` in values (with `rollout.strategy: canary`). + +The chart renders an **AnalysisTemplate** (`templates/analysistemplate.yaml`) that performs a **web** check against `http://..svc:/health` and expects JSON `status` to equal `healthy` (see `app_python` `/health`). + +**Canary** steps include an `analysis` step after `setWeight: 20` when analysis is enabled. If analysis fails, the Rollout can **abort** the promotion per controller settings. + +**Prometheus**-based analysis is possible in larger setups (cluster Prometheus URL + PromQL in `AnalysisTemplate`); this repo uses the **web** provider for a minimal, dependency-free path. + +## 6. Further reading + +- [Rollout spec](https://argoproj.github.io/argo-rollouts/features/specification/) +- [Canary](https://argoproj.github.io/argo-rollouts/features/canary/) / [Blue-Green](https://argoproj.github.io/argo-rollouts/features/bluegreen/) +- [Analysis & progressive delivery](https://argoproj.github.io/argo-rollouts/features/analysis/) + +## 7. Screenshot / evidence checklist (for your report) + +- Argo Rollouts **dashboard** showing a Rollout and its steps +- `kubectl argo rollouts get rollout ...` / `get pods` during a canary +- (Blue-green) port-forward to **active** vs **preview** and note versions +- (Bonus) `AnalysisRun` success/failure in the UI or `kubectl get analysisrun` diff --git a/k8s/SECRETS.md b/k8s/SECRETS.md new file mode 100644 index 0000000000..c1f9a63bea --- /dev/null +++ b/k8s/SECRETS.md @@ -0,0 +1,186 @@ +# Lab 11 — Kubernetes Secrets & HashiCorp Vault + +This document describes secret management for the `devops-python` Helm chart (`k8s/devops-python/`) and how to run the lab tasks locally. + +--- + +## 1. Kubernetes Secrets (Task 1) + +### Create a Secret imperatively + +```bash +kubectl create secret generic app-credentials \ + --from-literal=username=demo-user \ + --from-literal=password=demo-pass \ + --dry-run=client -o yaml | kubectl apply -f - +``` + +### View and decode + +```bash +kubectl get secret app-credentials -o yaml +# Copy the base64 value and decode: +echo "" | base64 -d +``` + +### Encoding vs encryption + +- **Base64** in Secret objects is **encoding**, not encryption. Anyone with RBAC permission to read `Secret` resources can decode values. +- **Encryption at rest** for etcd is a **cluster-level** feature (see [Encrypting Secret Data at Rest](https://kubernetes.io/docs/tasks/administer-cluster/encrypt-data/)). It is **not** enabled by default on all clusters; check your cluster docs. + +**Production:** restrict Secret access with RBAC, enable etcd encryption, use an external secret manager (Vault, cloud SM) for sensitive material. + +--- + +## 2. Helm-managed Secrets (Task 2) + +### Chart additions + +| File | Purpose | +|------|---------| +| `templates/secrets.yaml` | `Secret` with `stringData` from `.Values.secrets.stringData` | +| `templates/serviceaccount.yaml` | ServiceAccount for Vault Kubernetes auth | +| `templates/rollout.yaml` or `templates/statefulset.yaml` | `envFrom.secretRef` when secrets enabled; Vault pod annotations when enabled (workload toggled by `.Values.workload.kind`) | +| `templates/_helpers.tpl` | `devops-python.secretName`, `devops-python.serviceAccountName`, `devops-python.envVars` (DRY) | + +### Default values (placeholders only — do not commit real passwords) + +See `values.yaml`: + +- `secrets.enabled`, `secrets.create`, `secrets.stringData` +- Use `secrets.existingSecretName` to reference a pre-created Secret instead of chart-managed one. + +### Install without committing real secrets + +```bash +helm upgrade --install devops ./k8s/devops-python \ + --set secrets.stringData.username=prod-user \ + --set secrets.stringData.password=prod-pass +``` + +### Verify injection (keys become env vars: `USERNAME`, `PASSWORD`) + +```bash +POD=$(kubectl get pods -l app.kubernetes.io/instance=devops -o jsonpath='{.items[0].metadata.name}') +kubectl exec -it "$POD" -- env | grep -E '^(USERNAME|PASSWORD)=' +``` + +`kubectl describe pod` does **not** print secret values (only references). + +### Resource limits + +CPU/memory requests and limits remain in `values.yaml` under `resources` (same as Lab 10). + +--- + +## 3. HashiCorp Vault (Task 3) + +### Install Vault (dev mode — learning only) + +```bash +helm repo add hashicorp https://helm.releases.hashicorp.com +helm repo update +helm install vault hashicorp/vault \ + --namespace vault --create-namespace \ + --set server.dev.enabled=true \ + --set injector.enabled=true +kubectl get pods -n vault +``` + +### Configure Vault (run inside `vault-0`) + +Exec and use the Vault CLI (dev mode is auto-unsealed): + +```bash +kubectl exec -n vault vault-0 -- vault secrets enable -path=secret kv-v2 +kubectl exec -n vault vault-0 -- vault kv put secret/myapp/config username="vault-user" password="vault-pass" +``` + +### Kubernetes auth + +Point Vault at your cluster (paths vary; see [Kubernetes auth](https://developer.hashicorp.com/vault/docs/auth/kubernetes)): + +```bash +kubectl exec -n vault vault-0 -- vault auth enable kubernetes +kubectl exec -n vault vault-0 -- sh -c 'vault write auth/kubernetes/config \ + kubernetes_host="https://$KUBERNETES_PORT_443_TCP_ADDR:443" \ + kubernetes_ca_cert=@/var/run/secrets/kubernetes.io/serviceaccount/ca.crt \ + token_reviewer_jwt=@/var/run/secrets/kubernetes.io/serviceaccount/token' +``` + +Create a policy (read KV path) and a role bound to your app’s ServiceAccount: + +```bash +kubectl exec -n vault vault-0 -- sh -c 'vault policy write devops-python - < -- cat /vault/secrets/config +``` + +--- + +## 4. Security Analysis + +| Approach | Pros | Cons | +|----------|------|------| +| **Kubernetes Secrets** | Simple, native, works offline | Base64 only by default; etcd encryption is cluster config; RBAC critical | +| **Vault** | Centralized secrets, policies, rotation, audit | Operational complexity; needs HA for prod | + +**When to use which:** use K8s Secrets for low-sensitivity bootstrap; use Vault (or cloud SM) for credentials, keys, and rotation in production. + +--- + +## 5. Bonus — Vault Agent template file + +When `vault.injector.templateEnabled: true`, the chart uses **`files/vault-agent-inject.tpl`** (raw Vault template syntax, not Helm-processed) with `agent-inject-template-config` instead of `agent-inject-secret-config`. + +**Named template (Helm):** `devops-python.envVars` in `_helpers.tpl` keeps non-secret env DRY. + +**Vault Agent refresh:** the sidecar renews leases and re-renders files on secret changes; see [Agent annotations](https://developer.hashicorp.com/vault/docs/platform/k8s/injector/annotations). + +--- + +## Validation + +```bash +helm lint k8s/devops-python +helm template test k8s/devops-python | grep -E 'Secret|envFrom|vault.hashicorp.com' -A2 +``` diff --git a/k8s/STATEFULSET.md b/k8s/STATEFULSET.md new file mode 100644 index 0000000000..07bf07aa43 --- /dev/null +++ b/k8s/STATEFULSET.md @@ -0,0 +1,111 @@ +# Lab 15 — StatefulSets & persistent storage + +This chart can run the Python app as an **Argo Rollout** (Lab 14, default) or a **StatefulSet** (Lab 15) via `workload.kind`. + +## Why StatefulSet? + +StatefulSets give **stable pod names** (`-devops-python-0`, `-1`, …), **ordered** start/terminate, and **per-pod** persistent volumes via **`volumeClaimTemplates`**. Use them for workloads that need identity and dedicated disk (databases, Kafka, etc.). For **stateless** HTTP apps and **canary/blue-green**, prefer a **Rollout** or Deployment. + +| | Rollout / Deployment | StatefulSet | +|---|---------------------|-------------| +| Pod names | Random suffix | Stable ordinal | +| Storage | Often one shared PVC or ephemeral | **Per-pod** PVC from templates | +| Scale | Any order | Typically ordered (0→1→2) | +| DNS | Via Service only | **Headless** Service → per-pod A records | + +## Install as StatefulSet + +```bash +helm upgrade --install myapp ./k8s/devops-python -n default --create-namespace \ + -f k8s/devops-python/values-statefulset.yaml +``` + +Or set `workload.kind: statefulSet` in your own values file. + +**Templates involved:** + +- `templates/statefulset.yaml` — `serviceName` points at the headless service +- `templates/service-headless.yaml` — `clusterIP: None`, same selectors as pods +- `templates/service.yaml` — unchanged **client** Service (NodePort/ClusterIP) for load-balanced access + +When `workload.kind` is `statefulSet`, the chart does **not** render the single shared `PersistentVolumeClaim` used by the Rollout path; storage is **only** from **`volumeClaimTemplates`** (name `visits-data`), one PVC per pod. + +## Verify resources + +```bash +kubectl get sts,po,svc,pvc -l app.kubernetes.io/instance= +``` + +Expect PVCs named `visits-data--0`, `visits-data--1`, … + +## Headless DNS + +Pattern: + +`...svc.cluster.local` + +Example (release `myapp`, name `myapp-devops-python`): + +- Pods: `myapp-devops-python-0`, `myapp-devops-python-1` +- Headless Service: `myapp-devops-python-headless` +- Pod 0 resolves pod 1: + `nslookup myapp-devops-python-1.myapp-devops-python-headless.default.svc.cluster.local` + +`publishNotReadyAddresses: true` allows DNS records even before pods are Ready (use with care in production). + +## Per-pod visit counter (Lab 12 app) + +Each pod mounts **its own** volume at `/data`, so **`GET /visits`** reflects **only that pod’s** file. + +**Example:** + +```bash +kubectl port-forward pod/myapp-devops-python-0 8080:5000 & +kubectl port-forward pod/myapp-devops-python-1 8081:5000 & +curl -s localhost:8080/visits +curl -s http://localhost:8081/visits +``` + +Hit `/` several times on each port-forward (or use the main Service and rely on load balancing) to show **different** totals per pod when addressed directly. + +## Persistence after deleting a pod + +```bash +kubectl exec myapp-devops-python-0 -- cat /data/visits +kubectl delete pod myapp-devops-python-0 +# wait for pod 0 to recreate +kubectl exec myapp-devops-python-0 -- cat /data/visits +``` + +The ordinal **0** is reattached to the **same** PVC; the counter file should match the pre-delete value. + +## Bonus — update strategies + +**Partitioned rolling update** (`values-statefulset-partition.yaml`): + +- `updateStrategy.rollingUpdate.partition: N` — during an image/config change, only pods with **ordinal ≥ N** receive the new revision until you lower or remove the partition. + +**OnDelete** (`values-statefulset-ondelete.yaml`): + +- `updateStrategy.type: OnDelete` — the controller does not restart pods automatically; delete a pod manually to apply the new template. Useful for strict operational control; slower and easy to forget. + +Combine with the main StatefulSet values file: + +```bash +helm upgrade --install myapp ./k8s/devops-python \ + -f k8s/devops-python/values-statefulset.yaml \ + -f k8s/devops-python/values-statefulset-partition.yaml +``` + +## References + +- [StatefulSet](https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/) +- [Headless Services](https://kubernetes.io/docs/concepts/services-networking/service/#headless-services) +- [VolumeClaimTemplates](https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#volume-claim-templates) + +## Evidence checklist (for your report) + +- `kubectl get sts,po,svc,pvc` output +- DNS lookup from inside a pod +- Different `/visits` (or `/data/visits`) per ordinal +- Same count on pod `*-0` after delete/recreate diff --git a/k8s/argocd/application-dev.yaml b/k8s/argocd/application-dev.yaml new file mode 100644 index 0000000000..5b4204868b --- /dev/null +++ b/k8s/argocd/application-dev.yaml @@ -0,0 +1,26 @@ +# Task 3 — dev: auto-sync + self-heal + prune. +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: devops-python-dev + namespace: argocd + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + project: default + source: + repoURL: https://github.com/YOUR_GITHUB_USER/YOUR_REPO.git + targetRevision: main + path: k8s/devops-python + helm: + valueFiles: + - values-dev.yaml + destination: + server: https://kubernetes.default.svc + namespace: dev + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true diff --git a/k8s/argocd/application-prod.yaml b/k8s/argocd/application-prod.yaml new file mode 100644 index 0000000000..66b5c32a6c --- /dev/null +++ b/k8s/argocd/application-prod.yaml @@ -0,0 +1,23 @@ +# Task 3 — prod: manual sync only (no automated block). +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: devops-python-prod + namespace: argocd + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + project: default + source: + repoURL: https://github.com/YOUR_GITHUB_USER/YOUR_REPO.git + targetRevision: main + path: k8s/devops-python + helm: + valueFiles: + - values-prod.yaml + destination: + server: https://kubernetes.default.svc + namespace: prod + syncPolicy: + syncOptions: + - CreateNamespace=true diff --git a/k8s/argocd/application.yaml b/k8s/argocd/application.yaml new file mode 100644 index 0000000000..a36eb84585 --- /dev/null +++ b/k8s/argocd/application.yaml @@ -0,0 +1,26 @@ +# Task 2 — initial single-environment deploy (manual sync). +# Replace repoURL and targetRevision with your fork/branch before applying. +# If you use application-dev.yaml + application-prod.yaml instead, skip this file +# to avoid overlapping intent (this one targets namespace "default"). +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: devops-python + namespace: argocd + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + project: default + source: + repoURL: https://github.com/YOUR_GITHUB_USER/YOUR_REPO.git + targetRevision: main + path: k8s/devops-python + helm: + valueFiles: + - values.yaml + destination: + server: https://kubernetes.default.svc + namespace: default + syncPolicy: + syncOptions: + - CreateNamespace=true diff --git a/k8s/argocd/applicationset.yaml b/k8s/argocd/applicationset.yaml new file mode 100644 index 0000000000..7d300a25fb --- /dev/null +++ b/k8s/argocd/applicationset.yaml @@ -0,0 +1,46 @@ +# Bonus — ApplicationSet (List generator + goTemplate for per-env sync policy). +# Do not apply together with application-dev.yaml / application-prod.yaml (same app names). +# Requires Argo CD with ApplicationSet controller (default in argo-cd Helm install). +apiVersion: argoproj.io/v1alpha1 +kind: ApplicationSet +metadata: + name: devops-python + namespace: argocd +spec: + goTemplate: true + goTemplateOptions: + - missingkey=error + generators: + - list: + elements: + - env: dev + namespace: dev + valuesFile: values-dev.yaml + auto: true + - env: prod + namespace: prod + valuesFile: values-prod.yaml + auto: false + template: + metadata: + name: "devops-python-{{ .env }}" + spec: + project: default + source: + repoURL: https://github.com/YOUR_GITHUB_USER/YOUR_REPO.git + targetRevision: main + path: k8s/devops-python + helm: + valueFiles: + - "{{ .valuesFile }}" + destination: + server: https://kubernetes.default.svc + namespace: "{{ .namespace }}" + syncPolicy: + {{- if .auto }} + automated: + prune: true + selfHeal: true + {{- end }} + syncOptions: + - CreateNamespace=true diff --git a/k8s/argocd/install-argocd.sh b/k8s/argocd/install-argocd.sh new file mode 100755 index 0000000000..eeed14730e --- /dev/null +++ b/k8s/argocd/install-argocd.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +# Lab 13 — install Argo CD into the cluster (Task 1). +# Usage: ./install-argocd.sh +set -euo pipefail + +echo "Adding Argo Helm repo..." +helm repo add argo https://argoproj.github.io/argo-helm +helm repo update + +echo "Creating namespace argocd (if missing)..." +kubectl get namespace argocd >/dev/null 2>&1 || kubectl create namespace argocd + +if helm status argocd -n argocd >/dev/null 2>&1; then + echo "Helm release 'argocd' already exists; skipping install." + echo "To upgrade the chart: helm upgrade argocd argo/argo-cd --namespace argocd --reuse-values" +else + echo "Installing argo-cd..." + helm install argocd argo/argo-cd --namespace argocd +fi + +echo "Waiting for argocd-server to be ready (up to 180s)..." +kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=argocd-server -n argocd --timeout=180s + +echo "" +echo "Next steps:" +echo " UI: kubectl port-forward svc/argocd-server -n argocd 8080:443" +echo " Open https://localhost:8080 (accept self-signed cert); user: admin" +echo " Pass: kubectl -n argocd get secret argocd-initial-admin-secret -o jsonpath='{.data.password}' | base64 -d; echo" +echo " CLI: argocd login localhost:8080 --insecure" +echo "" diff --git a/k8s/argocd/namespaces.yaml b/k8s/argocd/namespaces.yaml new file mode 100644 index 0000000000..7bc76cef31 --- /dev/null +++ b/k8s/argocd/namespaces.yaml @@ -0,0 +1,16 @@ +# Optional: apply before Argo CD apps, or rely on sync option CreateNamespace=true. +apiVersion: v1 +kind: Namespace +metadata: + name: dev + labels: + app.kubernetes.io/part-of: devops-python + environment: dev +--- +apiVersion: v1 +kind: Namespace +metadata: + name: prod + labels: + app.kubernetes.io/part-of: devops-python + environment: prod diff --git a/k8s/deployment.yml b/k8s/deployment.yml new file mode 100644 index 0000000000..d0bebde5db --- /dev/null +++ b/k8s/deployment.yml @@ -0,0 +1,81 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: devops-python + labels: + app: devops-python + lab: "09" +spec: + replicas: 3 + revisionHistoryLimit: 5 + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 + selector: + matchLabels: + app: devops-python + template: + metadata: + labels: + app: devops-python + lab: "09" + spec: + securityContext: + seccompProfile: + type: RuntimeDefault + containers: + - name: app + image: jambulancia/devops-info-service:latest + imagePullPolicy: IfNotPresent + ports: + - name: http + containerPort: 5000 + protocol: TCP + env: + - name: HOST + value: "0.0.0.0" + - name: PORT + value: "5000" + - name: LOG_FORMAT + value: "json" + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 200m + memory: 256Mi + readinessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 2 + failureThreshold: 3 + livenessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 2 + failureThreshold: 3 + startupProbe: + httpGet: + path: /health + port: http + periodSeconds: 5 + timeoutSeconds: 2 + failureThreshold: 20 + securityContext: + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 1000 + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: + - ALL diff --git a/k8s/devops-python/Chart.yaml b/k8s/devops-python/Chart.yaml new file mode 100644 index 0000000000..e99b47063b --- /dev/null +++ b/k8s/devops-python/Chart.yaml @@ -0,0 +1,13 @@ +apiVersion: v2 +name: devops-python +description: DevOps course Python app — Helm chart; Rollout / StatefulSet; Lab 16 init + ServiceMonitor. +type: application +version: 0.4.0 +appVersion: "1.0.0" + +keywords: + - devops + - python + - flask + - kubernetes + diff --git a/k8s/devops-python/files/config.json b/k8s/devops-python/files/config.json new file mode 100644 index 0000000000..45327a80fa --- /dev/null +++ b/k8s/devops-python/files/config.json @@ -0,0 +1,8 @@ +{ + "appName": "devops-info-service", + "environment": "dev", + "features": { + "visitsCounter": true, + "debugEndpoints": false + } +} diff --git a/k8s/devops-python/files/vault-agent-inject.tpl b/k8s/devops-python/files/vault-agent-inject.tpl new file mode 100644 index 0000000000..84619d7320 --- /dev/null +++ b/k8s/devops-python/files/vault-agent-inject.tpl @@ -0,0 +1,4 @@ +{{- with secret "secret/data/myapp/config" -}} +USERNAME={{ .Data.data.username }} +PASSWORD={{ .Data.data.password }} +{{- end -}} diff --git a/k8s/devops-python/templates/NOTES.txt b/k8s/devops-python/templates/NOTES.txt new file mode 100644 index 0000000000..fea8c01f4e --- /dev/null +++ b/k8s/devops-python/templates/NOTES.txt @@ -0,0 +1,33 @@ +1) Get the application URL: + +{{- if eq .Values.service.type "NodePort" }} + export NODE_PORT={{ .Values.service.nodePort }} + echo "NodePort: ${NODE_PORT}" + +Minikube: + minikube service {{ include "devops-python.fullname" . }} --url + +Generic (port-forward): + kubectl port-forward service/{{ include "devops-python.fullname" . }} 8080:{{ .Values.service.port }} + curl http://localhost:8080/health +{{- else }} + kubectl get svc {{ include "devops-python.fullname" . }} -o wide +{{- end }} + +2) Workload: {{ .Values.workload.kind }} (set workload.kind: statefulSet for Lab 15 — see values-statefulset.yaml + k8s/STATEFULSET.md). +{{- if eq .Values.workload.kind "rollout" }} + Argo Rollouts: ./k8s/rollouts/install-argo-rollouts.sh + kubectl argo rollouts get rollout {{ include "devops-python.fullname" . }} -n {{ .Release.Namespace }} -w + kubectl argo rollouts promote {{ include "devops-python.fullname" . }} -n {{ .Release.Namespace }} + Blue-green preview (if strategy=blueGreen): {{ include "devops-python.fullname" . }}-preview +{{- else }} + Headless Service (per-pod DNS): {{ include "devops-python.fullname" . }}-headless + kubectl get pods,pvc -n {{ .Release.Namespace }} -l app.kubernetes.io/instance={{ .Release.Name }} +{{- end }} + +3) Lab 16 — Init file from init container: `/init/{{ .Values.initContainers.download.filename }}` (default `index.html`). Monitoring: `k8s/MONITORING.md`, optional ServiceMonitor: `.Values.serviceMonitor.enabled` + +4) Useful Helm commands: + helm get manifest {{ .Release.Name }} + helm get values {{ .Release.Name }} + helm history {{ .Release.Name }} diff --git a/k8s/devops-python/templates/_helpers.tpl b/k8s/devops-python/templates/_helpers.tpl new file mode 100644 index 0000000000..1b40853058 --- /dev/null +++ b/k8s/devops-python/templates/_helpers.tpl @@ -0,0 +1,144 @@ +{{/* +Chart name +*/}} +{{- define "devops-python.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Full name +*/}} +{{- define "devops-python.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} + +{{/* +Selector labels +*/}} +{{- define "devops-python.selectorLabels" -}} +app.kubernetes.io/name: {{ include "devops-python.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end -}} + +{{/* +Common labels +*/}} +{{- define "devops-python.labels" -}} +helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} +{{ include "devops-python.selectorLabels" . }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end -}} + +{{/* +Secret resource name (Helm-managed or external) +*/}} +{{- define "devops-python.secretName" -}} +{{- if .Values.secrets.existingSecretName -}} +{{- .Values.secrets.existingSecretName -}} +{{- else -}} +{{- printf "%s-app-credentials" (include "devops-python.fullname" .) | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} + +{{/* +Service account name +*/}} +{{- define "devops-python.serviceAccountName" -}} +{{- if .Values.serviceAccount.create -}} +{{- default (include "devops-python.fullname" .) .Values.serviceAccount.name -}} +{{- else -}} +{{- default "default" .Values.serviceAccount.name -}} +{{- end -}} +{{- end -}} + +{{/* +Non-secret container env (DRY) — Lab 11 bonus +*/}} +{{- define "devops-python.envVars" -}} +- name: HOST + value: {{ .Values.env.HOST | quote }} +- name: PORT + value: {{ .Values.env.PORT | quote }} +- name: LOG_FORMAT + value: {{ .Values.env.LOG_FORMAT | quote }} +- name: VISITS_DATA_PATH + value: {{ .Values.env.VISITS_DATA_PATH | quote }} +{{- end -}} + +{{/* +Stable checksum input for env ConfigMap (Lab 12 bonus — pod restart on change) +*/}} +{{- define "devops-python.configEnvChecksum" -}} +{{- printf "%s|%s|%v" .Values.config.environment .Values.config.logLevel .Values.config.featureDebug -}} +{{- end -}} + +{{- define "devops-python.analysisTemplateName" -}} +{{- printf "%s-health" (include "devops-python.fullname" .) -}} +{{- end -}} + +{{/* +Lab 16 — init containers (download + wait-for-DNS). Included when .Values.initContainers.enabled +*/}} +{{- define "devops-python.initContainers" -}} +{{- if .Values.initContainers.waitForService.enabled }} +- name: wait-for-dns + image: {{ .Values.initContainers.waitForService.image | quote }} + command: + - sh + - -c + - | + set -e + until nslookup "$WAIT_HOST" >/dev/null 2>&1; do + echo "waiting for DNS: $WAIT_HOST" + sleep 2 + done + echo "DNS resolved for $WAIT_HOST" + env: + - name: WAIT_HOST + value: {{ .Values.initContainers.waitForService.host | quote }} + securityContext: + allowPrivilegeEscalation: false + runAsNonRoot: false + runAsUser: 0 + capabilities: + drop: + - ALL + resources: + requests: + cpu: 10m + memory: 16Mi + limits: + memory: 32Mi +{{- end }} +{{- if .Values.initContainers.download.enabled }} +- name: init-download + image: {{ .Values.initContainers.download.image | quote }} + command: + - sh + - -c + - wget -q -O /work-dir/{{ .Values.initContainers.download.filename }} {{ .Values.initContainers.download.url | quote }} + volumeMounts: + - name: init-workdir + mountPath: /work-dir + securityContext: + allowPrivilegeEscalation: false + runAsNonRoot: false + runAsUser: 0 + capabilities: + drop: + - ALL + resources: + requests: + cpu: 10m + memory: 32Mi + limits: + memory: 64Mi +{{- end }} +{{- end }} + diff --git a/k8s/devops-python/templates/analysistemplate.yaml b/k8s/devops-python/templates/analysistemplate.yaml new file mode 100644 index 0000000000..0f38be5f52 --- /dev/null +++ b/k8s/devops-python/templates/analysistemplate.yaml @@ -0,0 +1,19 @@ +{{- if and (eq .Values.workload.kind "rollout") (eq .Values.rollout.strategy "canary") .Values.rollout.analysis.enabled }} +apiVersion: argoproj.io/v1alpha1 +kind: AnalysisTemplate +metadata: + name: {{ include "devops-python.analysisTemplateName" . }} + labels: + {{- include "devops-python.labels" . | nindent 4 }} +spec: + metrics: + - name: health-status + provider: + web: + url: http://{{ include "devops-python.fullname" . }}.{{ .Release.Namespace }}.svc:{{ .Values.service.port }}{{ .Values.rollout.analysis.healthPath }} + jsonPath: "{$.status}" + successCondition: result == "healthy" + interval: 10s + count: 3 + failureLimit: 1 +{{- end }} diff --git a/k8s/devops-python/templates/configmap.yaml b/k8s/devops-python/templates/configmap.yaml new file mode 100644 index 0000000000..71640665f3 --- /dev/null +++ b/k8s/devops-python/templates/configmap.yaml @@ -0,0 +1,22 @@ +{{- if .Values.config.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "devops-python.fullname" . }}-file + labels: + {{- include "devops-python.labels" . | nindent 4 }} +data: + config.json: | +{{ .Files.Get "files/config.json" | nindent 4 }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "devops-python.fullname" . }}-env + labels: + {{- include "devops-python.labels" . | nindent 4 }} +data: + APP_CONFIG_ENV: {{ .Values.config.environment | quote }} + LOG_LEVEL: {{ .Values.config.logLevel | quote }} + FEATURE_DEBUG: {{ .Values.config.featureDebug | quote }} +{{- end }} diff --git a/k8s/devops-python/templates/hooks/post-install-job.yaml b/k8s/devops-python/templates/hooks/post-install-job.yaml new file mode 100644 index 0000000000..17b0f9c6b5 --- /dev/null +++ b/k8s/devops-python/templates/hooks/post-install-job.yaml @@ -0,0 +1,25 @@ +{{- if and .Values.hooks.enabled .Values.hooks.postInstall }} +apiVersion: batch/v1 +kind: Job +metadata: + name: "{{ include "devops-python.fullname" . }}-post-install" + labels: + {{- include "devops-python.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": post-install + "helm.sh/hook-weight": "{{ .Values.hooks.postInstall.weight }}" + "helm.sh/hook-delete-policy": hook-succeeded,before-hook-creation +spec: + template: + metadata: + labels: + {{- include "devops-python.selectorLabels" . | nindent 8 }} + spec: + restartPolicy: Never + containers: + - name: post-install + image: "{{ .Values.hooks.postInstall.image }}" + command: + {{- toYaml .Values.hooks.postInstall.command | nindent 12 }} +{{- end }} + diff --git a/k8s/devops-python/templates/hooks/pre-install-job.yaml b/k8s/devops-python/templates/hooks/pre-install-job.yaml new file mode 100644 index 0000000000..be4b0aebc4 --- /dev/null +++ b/k8s/devops-python/templates/hooks/pre-install-job.yaml @@ -0,0 +1,25 @@ +{{- if and .Values.hooks.enabled .Values.hooks.preInstall }} +apiVersion: batch/v1 +kind: Job +metadata: + name: "{{ include "devops-python.fullname" . }}-pre-install" + labels: + {{- include "devops-python.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": pre-install + "helm.sh/hook-weight": "{{ .Values.hooks.preInstall.weight }}" + "helm.sh/hook-delete-policy": hook-succeeded,before-hook-creation +spec: + template: + metadata: + labels: + {{- include "devops-python.selectorLabels" . | nindent 8 }} + spec: + restartPolicy: Never + containers: + - name: pre-install + image: "{{ .Values.hooks.preInstall.image }}" + command: + {{- toYaml .Values.hooks.preInstall.command | nindent 12 }} +{{- end }} + diff --git a/k8s/devops-python/templates/pvc.yaml b/k8s/devops-python/templates/pvc.yaml new file mode 100644 index 0000000000..39c35cb8f6 --- /dev/null +++ b/k8s/devops-python/templates/pvc.yaml @@ -0,0 +1,17 @@ +{{- if and .Values.persistence.enabled (eq .Values.workload.kind "rollout") }} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "devops-python.fullname" . }}-data + labels: + {{- include "devops-python.labels" . | nindent 4 }} +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: {{ .Values.persistence.size }} + {{- if .Values.persistence.storageClass }} + storageClassName: {{ .Values.persistence.storageClass | quote }} + {{- end }} +{{- end }} diff --git a/k8s/devops-python/templates/rollout.yaml b/k8s/devops-python/templates/rollout.yaml new file mode 100644 index 0000000000..e19f9b2224 --- /dev/null +++ b/k8s/devops-python/templates/rollout.yaml @@ -0,0 +1,164 @@ +{{- if eq .Values.workload.kind "rollout" }} +apiVersion: argoproj.io/v1alpha1 +kind: Rollout +metadata: + name: {{ include "devops-python.fullname" . }} + labels: + {{- include "devops-python.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + revisionHistoryLimit: 5 + selector: + matchLabels: + {{- include "devops-python.selectorLabels" . | nindent 6 }} + template: + metadata: + labels: + {{- include "devops-python.selectorLabels" . | nindent 8 }} + {{- if or (and .Values.configMapChecksum.enabled .Values.config.enabled) .Values.vault.injector.enabled }} + annotations: + {{- if and .Values.configMapChecksum.enabled .Values.config.enabled }} + checksum/config-file: {{ .Files.Get "files/config.json" | sha256sum }} + checksum/config-env: {{ include "devops-python.configEnvChecksum" . | sha256sum }} + {{- end }} + {{- if .Values.vault.injector.enabled }} + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/role: {{ .Values.vault.role | quote }} + {{- if .Values.vault.injector.templateEnabled }} + vault.hashicorp.com/agent-inject-template-config: | +{{ .Files.Get "files/vault-agent-inject.tpl" | nindent 10 }} + {{- else }} + vault.hashicorp.com/agent-inject-secret-config: {{ .Values.vault.secretPath | quote }} + {{- end }} + {{- range $k, $v := .Values.vault.injector.extraAnnotations }} + {{ $k }}: {{ $v | quote }} + {{- end }} + {{- end }} + {{- end }} + spec: + serviceAccountName: {{ include "devops-python.serviceAccountName" . }} + securityContext: + {{- toYaml .Values.securityContext.pod | nindent 8 }} + {{- if .Values.initContainers.enabled }} + initContainers: + {{- include "devops-python.initContainers" . | nindent 8 }} + {{- end }} + volumes: + {{- if .Values.initContainers.enabled }} + - name: init-workdir + emptyDir: {} + {{- end }} + {{- if .Values.config.enabled }} + - name: app-config + configMap: + name: {{ include "devops-python.fullname" . }}-file + {{- end }} + {{- if .Values.persistence.enabled }} + - name: visits-data + persistentVolumeClaim: + claimName: {{ include "devops-python.fullname" . }}-data + {{- end }} + - name: tmp + emptyDir: {} + containers: + - name: {{ include "devops-python.name" . }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + ports: + - name: http + containerPort: {{ .Values.containerPort }} + protocol: TCP + env: + {{- include "devops-python.envVars" . | nindent 12 }} + {{- $needEnvFrom := or (and .Values.config.enabled .Values.config.injectEnv) (and .Values.secrets.enabled (or .Values.secrets.create .Values.secrets.existingSecretName)) }} + {{- if $needEnvFrom }} + envFrom: + {{- if and .Values.config.enabled .Values.config.injectEnv }} + - configMapRef: + name: {{ include "devops-python.fullname" . }}-env + {{- end }} + {{- if and .Values.secrets.enabled (or .Values.secrets.create .Values.secrets.existingSecretName) }} + - secretRef: + name: {{ include "devops-python.secretName" . }} + {{- end }} + {{- end }} + volumeMounts: + {{- if .Values.config.enabled }} + - name: app-config + mountPath: /config + readOnly: true + {{- end }} + {{- if .Values.persistence.enabled }} + - name: visits-data + mountPath: /data + {{- end }} + {{- if .Values.initContainers.enabled }} + - name: init-workdir + mountPath: /init + readOnly: true + {{- end }} + - name: tmp + mountPath: /tmp + resources: + {{- toYaml .Values.resources | nindent 12 }} + {{- if .Values.probes.readiness.enabled }} + readinessProbe: + httpGet: + path: {{ .Values.probes.readiness.path }} + port: http + initialDelaySeconds: {{ .Values.probes.readiness.initialDelaySeconds }} + periodSeconds: {{ .Values.probes.readiness.periodSeconds }} + timeoutSeconds: {{ .Values.probes.readiness.timeoutSeconds }} + failureThreshold: {{ .Values.probes.readiness.failureThreshold }} + {{- end }} + {{- if .Values.probes.liveness.enabled }} + livenessProbe: + httpGet: + path: {{ .Values.probes.liveness.path }} + port: http + initialDelaySeconds: {{ .Values.probes.liveness.initialDelaySeconds }} + periodSeconds: {{ .Values.probes.liveness.periodSeconds }} + timeoutSeconds: {{ .Values.probes.liveness.timeoutSeconds }} + failureThreshold: {{ .Values.probes.liveness.failureThreshold }} + {{- end }} + {{- if .Values.probes.startup.enabled }} + startupProbe: + httpGet: + path: {{ .Values.probes.startup.path }} + port: http + initialDelaySeconds: {{ .Values.probes.startup.initialDelaySeconds }} + periodSeconds: {{ .Values.probes.startup.periodSeconds }} + timeoutSeconds: {{ .Values.probes.startup.timeoutSeconds }} + failureThreshold: {{ .Values.probes.startup.failureThreshold }} + {{- end }} + securityContext: + {{- toYaml .Values.securityContext.container | nindent 12 }} + strategy: + {{- if eq .Values.rollout.strategy "blueGreen" }} + blueGreen: + activeService: {{ include "devops-python.fullname" . }} + previewService: {{ include "devops-python.fullname" . }}-preview + autoPromotionEnabled: {{ .Values.rollout.blueGreen.autoPromotionEnabled }} + scaleDownDelaySeconds: {{ .Values.rollout.blueGreen.scaleDownDelaySeconds }} + previewReplicaCount: {{ .Values.rollout.blueGreen.previewReplicaCount }} + {{- else }} + canary: + maxSurge: {{ .Values.rollout.canary.maxSurge | quote }} + maxUnavailable: {{ .Values.rollout.canary.maxUnavailable | quote }} + steps: + - setWeight: 20 + {{- if .Values.rollout.analysis.enabled }} + - analysis: + templates: + - templateName: {{ include "devops-python.analysisTemplateName" . }} + {{- end }} + - pause: {} + - setWeight: 40 + - pause: { duration: 30s } + - setWeight: 60 + - pause: { duration: 30s } + - setWeight: 80 + - pause: { duration: 30s } + - setWeight: 100 + {{- end }} +{{- end }} diff --git a/k8s/devops-python/templates/secrets.yaml b/k8s/devops-python/templates/secrets.yaml new file mode 100644 index 0000000000..261f1e320a --- /dev/null +++ b/k8s/devops-python/templates/secrets.yaml @@ -0,0 +1,13 @@ +{{- if and .Values.secrets.enabled .Values.secrets.create }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ include "devops-python.secretName" . }} + labels: + {{- include "devops-python.labels" . | nindent 4 }} +type: Opaque +stringData: + {{- range $k, $v := .Values.secrets.stringData }} + {{ $k }}: {{ $v | quote }} + {{- end }} +{{- end }} diff --git a/k8s/devops-python/templates/service-headless.yaml b/k8s/devops-python/templates/service-headless.yaml new file mode 100644 index 0000000000..e33177909b --- /dev/null +++ b/k8s/devops-python/templates/service-headless.yaml @@ -0,0 +1,19 @@ +{{- if and .Values.service.enabled (eq .Values.workload.kind "statefulSet") }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "devops-python.fullname" . }}-headless + labels: + {{- include "devops-python.labels" . | nindent 4 }} + app.kubernetes.io/component: headless +spec: + clusterIP: None + publishNotReadyAddresses: true + selector: + {{- include "devops-python.selectorLabels" . | nindent 4 }} + ports: + - name: http + protocol: TCP + port: {{ .Values.service.port }} + targetPort: {{ .Values.service.targetPort }} +{{- end }} diff --git a/k8s/devops-python/templates/service-preview.yaml b/k8s/devops-python/templates/service-preview.yaml new file mode 100644 index 0000000000..03903fe651 --- /dev/null +++ b/k8s/devops-python/templates/service-preview.yaml @@ -0,0 +1,21 @@ +{{- if and .Values.service.enabled (eq .Values.workload.kind "rollout") (eq .Values.rollout.strategy "blueGreen") }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "devops-python.fullname" . }}-preview + labels: + {{- include "devops-python.labels" . | nindent 4 }} + app.kubernetes.io/component: preview +spec: + type: {{ .Values.service.type }} + selector: + {{- include "devops-python.selectorLabels" . | nindent 4 }} + ports: + - name: http + protocol: TCP + port: {{ .Values.service.port }} + targetPort: {{ .Values.service.targetPort }} + {{- if and (eq .Values.service.type "NodePort") .Values.service.previewNodePort }} + nodePort: {{ .Values.service.previewNodePort }} + {{- end }} +{{- end }} diff --git a/k8s/devops-python/templates/service.yaml b/k8s/devops-python/templates/service.yaml new file mode 100644 index 0000000000..74bea9bbf6 --- /dev/null +++ b/k8s/devops-python/templates/service.yaml @@ -0,0 +1,21 @@ +{{- if .Values.service.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "devops-python.fullname" . }} + labels: + {{- include "devops-python.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + selector: + {{- include "devops-python.selectorLabels" . | nindent 4 }} + ports: + - name: http + protocol: TCP + port: {{ .Values.service.port }} + targetPort: {{ .Values.service.targetPort }} + {{- if and (eq .Values.service.type "NodePort") .Values.service.nodePort }} + nodePort: {{ .Values.service.nodePort }} + {{- end }} +{{- end }} + diff --git a/k8s/devops-python/templates/serviceaccount.yaml b/k8s/devops-python/templates/serviceaccount.yaml new file mode 100644 index 0000000000..a6d9993ae7 --- /dev/null +++ b/k8s/devops-python/templates/serviceaccount.yaml @@ -0,0 +1,8 @@ +{{- if .Values.serviceAccount.create }} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "devops-python.serviceAccountName" . }} + labels: + {{- include "devops-python.labels" . | nindent 4 }} +{{- end }} diff --git a/k8s/devops-python/templates/servicemonitor.yaml b/k8s/devops-python/templates/servicemonitor.yaml new file mode 100644 index 0000000000..ef8951821b --- /dev/null +++ b/k8s/devops-python/templates/servicemonitor.yaml @@ -0,0 +1,21 @@ +{{- if and .Values.serviceMonitor.enabled .Values.service.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ include "devops-python.fullname" . }} + labels: + release: {{ .Values.serviceMonitor.releaseLabel | quote }} + {{- include "devops-python.labels" . | nindent 4 }} +spec: + selector: + matchLabels: + {{- include "devops-python.selectorLabels" . | nindent 6 }} + namespaceSelector: + matchNames: + - {{ .Release.Namespace }} + endpoints: + - port: http + path: /metrics + interval: {{ .Values.serviceMonitor.interval | quote }} + scrapeTimeout: {{ .Values.serviceMonitor.scrapeTimeout | quote }} +{{- end }} diff --git a/k8s/devops-python/templates/statefulset.yaml b/k8s/devops-python/templates/statefulset.yaml new file mode 100644 index 0000000000..f493ccdb21 --- /dev/null +++ b/k8s/devops-python/templates/statefulset.yaml @@ -0,0 +1,152 @@ +{{- if eq .Values.workload.kind "statefulSet" }} +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: {{ include "devops-python.fullname" . }} + labels: + {{- include "devops-python.labels" . | nindent 4 }} +spec: + serviceName: {{ include "devops-python.fullname" . }}-headless + replicas: {{ .Values.replicaCount }} + podManagementPolicy: {{ .Values.statefulSet.podManagementPolicy }} + updateStrategy: + {{- toYaml .Values.statefulSet.updateStrategy | nindent 4 }} + selector: + matchLabels: + {{- include "devops-python.selectorLabels" . | nindent 6 }} + template: + metadata: + labels: + {{- include "devops-python.selectorLabels" . | nindent 8 }} + {{- if or (and .Values.configMapChecksum.enabled .Values.config.enabled) .Values.vault.injector.enabled }} + annotations: + {{- if and .Values.configMapChecksum.enabled .Values.config.enabled }} + checksum/config-file: {{ .Files.Get "files/config.json" | sha256sum }} + checksum/config-env: {{ include "devops-python.configEnvChecksum" . | sha256sum }} + {{- end }} + {{- if .Values.vault.injector.enabled }} + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/role: {{ .Values.vault.role | quote }} + {{- if .Values.vault.injector.templateEnabled }} + vault.hashicorp.com/agent-inject-template-config: | +{{ .Files.Get "files/vault-agent-inject.tpl" | nindent 10 }} + {{- else }} + vault.hashicorp.com/agent-inject-secret-config: {{ .Values.vault.secretPath | quote }} + {{- end }} + {{- range $k, $v := .Values.vault.injector.extraAnnotations }} + {{ $k }}: {{ $v | quote }} + {{- end }} + {{- end }} + {{- end }} + spec: + serviceAccountName: {{ include "devops-python.serviceAccountName" . }} + securityContext: + {{- toYaml .Values.securityContext.pod | nindent 8 }} + {{- if .Values.initContainers.enabled }} + initContainers: + {{- include "devops-python.initContainers" . | nindent 8 }} + {{- end }} + volumes: + {{- if .Values.initContainers.enabled }} + - name: init-workdir + emptyDir: {} + {{- end }} + {{- if .Values.config.enabled }} + - name: app-config + configMap: + name: {{ include "devops-python.fullname" . }}-file + {{- end }} + {{- if not .Values.persistence.enabled }} + - name: visits-data + emptyDir: {} + {{- end }} + - name: tmp + emptyDir: {} + containers: + - name: {{ include "devops-python.name" . }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + ports: + - name: http + containerPort: {{ .Values.containerPort }} + protocol: TCP + env: + {{- include "devops-python.envVars" . | nindent 12 }} + {{- $needEnvFrom := or (and .Values.config.enabled .Values.config.injectEnv) (and .Values.secrets.enabled (or .Values.secrets.create .Values.secrets.existingSecretName)) }} + {{- if $needEnvFrom }} + envFrom: + {{- if and .Values.config.enabled .Values.config.injectEnv }} + - configMapRef: + name: {{ include "devops-python.fullname" . }}-env + {{- end }} + {{- if and .Values.secrets.enabled (or .Values.secrets.create .Values.secrets.existingSecretName) }} + - secretRef: + name: {{ include "devops-python.secretName" . }} + {{- end }} + {{- end }} + volumeMounts: + {{- if .Values.config.enabled }} + - name: app-config + mountPath: /config + readOnly: true + {{- end }} + - name: visits-data + mountPath: /data + {{- if .Values.initContainers.enabled }} + - name: init-workdir + mountPath: /init + readOnly: true + {{- end }} + - name: tmp + mountPath: /tmp + resources: + {{- toYaml .Values.resources | nindent 12 }} + {{- if .Values.probes.readiness.enabled }} + readinessProbe: + httpGet: + path: {{ .Values.probes.readiness.path }} + port: http + initialDelaySeconds: {{ .Values.probes.readiness.initialDelaySeconds }} + periodSeconds: {{ .Values.probes.readiness.periodSeconds }} + timeoutSeconds: {{ .Values.probes.readiness.timeoutSeconds }} + failureThreshold: {{ .Values.probes.readiness.failureThreshold }} + {{- end }} + {{- if .Values.probes.liveness.enabled }} + livenessProbe: + httpGet: + path: {{ .Values.probes.liveness.path }} + port: http + initialDelaySeconds: {{ .Values.probes.liveness.initialDelaySeconds }} + periodSeconds: {{ .Values.probes.liveness.periodSeconds }} + timeoutSeconds: {{ .Values.probes.liveness.timeoutSeconds }} + failureThreshold: {{ .Values.probes.liveness.failureThreshold }} + {{- end }} + {{- if .Values.probes.startup.enabled }} + startupProbe: + httpGet: + path: {{ .Values.probes.startup.path }} + port: http + initialDelaySeconds: {{ .Values.probes.startup.initialDelaySeconds }} + periodSeconds: {{ .Values.probes.startup.periodSeconds }} + timeoutSeconds: {{ .Values.probes.startup.timeoutSeconds }} + failureThreshold: {{ .Values.probes.startup.failureThreshold }} + {{- end }} + securityContext: + {{- toYaml .Values.securityContext.container | nindent 12 }} + {{- if .Values.persistence.enabled }} + volumeClaimTemplates: + - metadata: + name: visits-data + labels: + {{- include "devops-python.labels" . | nindent 10 }} + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: {{ .Values.persistence.size }} + {{- if .Values.persistence.storageClass }} + storageClassName: {{ .Values.persistence.storageClass | quote }} + {{- end }} + {{- end }} +{{- end }} diff --git a/k8s/devops-python/values-bluegreen.yaml b/k8s/devops-python/values-bluegreen.yaml new file mode 100644 index 0000000000..37330cd170 --- /dev/null +++ b/k8s/devops-python/values-bluegreen.yaml @@ -0,0 +1,15 @@ +# Lab 14 — blue-green: use with helm install/upgrade -f values-bluegreen.yaml +rollout: + strategy: blueGreen + blueGreen: + autoPromotionEnabled: false + scaleDownDelaySeconds: 30 + previewReplicaCount: 1 + +# Enough replicas to run active + preview stack during promotion +replicaCount: 2 + +service: + type: NodePort + nodePort: 30080 + previewNodePort: 30081 diff --git a/k8s/devops-python/values-dev.yaml b/k8s/devops-python/values-dev.yaml new file mode 100644 index 0000000000..c45120fb48 --- /dev/null +++ b/k8s/devops-python/values-dev.yaml @@ -0,0 +1,23 @@ +# Development overrides (Task 3) +# Lab 14 canary: use at least 2 replicas so setWeight percentages can schedule canary pods +replicaCount: 2 + +image: + tag: latest + +resources: + requests: + cpu: 50m + memory: 64Mi + limits: + cpu: 100m + memory: 128Mi + +service: + type: NodePort + +probes: + liveness: + initialDelaySeconds: 5 + periodSeconds: 10 + diff --git a/k8s/devops-python/values-prod.yaml b/k8s/devops-python/values-prod.yaml new file mode 100644 index 0000000000..758c876207 --- /dev/null +++ b/k8s/devops-python/values-prod.yaml @@ -0,0 +1,26 @@ +# Production overrides (Task 3) +replicaCount: 5 + +image: + tag: "1.0.0" + +resources: + requests: + cpu: 200m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + +service: + # Local clusters may not support external LB, but prod is LoadBalancer-ready. + type: LoadBalancer + +probes: + liveness: + initialDelaySeconds: 30 + periodSeconds: 5 + readiness: + initialDelaySeconds: 10 + periodSeconds: 3 + diff --git a/k8s/devops-python/values-servicemonitor.yaml b/k8s/devops-python/values-servicemonitor.yaml new file mode 100644 index 0000000000..2fab2d910b --- /dev/null +++ b/k8s/devops-python/values-servicemonitor.yaml @@ -0,0 +1,3 @@ +# Lab 16 bonus — Prometheus Operator discovers ServiceMonitors with label release= +serviceMonitor: + enabled: true diff --git a/k8s/devops-python/values-statefulset-ondelete.yaml b/k8s/devops-python/values-statefulset-ondelete.yaml new file mode 100644 index 0000000000..0393d897f4 --- /dev/null +++ b/k8s/devops-python/values-statefulset-ondelete.yaml @@ -0,0 +1,8 @@ +# Lab 15 bonus — OnDelete: pods adopt new spec only after manual pod delete +# Install: helm upgrade --install ... -f k8s/devops-python/values-statefulset.yaml -f k8s/devops-python/values-statefulset-ondelete.yaml +workload: + kind: statefulSet + +statefulSet: + updateStrategy: + type: OnDelete diff --git a/k8s/devops-python/values-statefulset-partition.yaml b/k8s/devops-python/values-statefulset-partition.yaml new file mode 100644 index 0000000000..111d760678 --- /dev/null +++ b/k8s/devops-python/values-statefulset-partition.yaml @@ -0,0 +1,10 @@ +# Lab 15 bonus — partition: only pods with ordinal >= partition get the new template on update +# Example: 3 replicas (0,1,2); partition 2 updates pod-2 first; 0,1 keep old until partition lowered +workload: + kind: statefulSet + +statefulSet: + updateStrategy: + type: RollingUpdate + rollingUpdate: + partition: 2 diff --git a/k8s/devops-python/values-statefulset.yaml b/k8s/devops-python/values-statefulset.yaml new file mode 100644 index 0000000000..a5c6205f27 --- /dev/null +++ b/k8s/devops-python/values-statefulset.yaml @@ -0,0 +1,7 @@ +# Lab 15 — StatefulSet with per-pod PVC + headless Service for stable DNS +workload: + kind: statefulSet + +replicaCount: 3 + +# Per-pod claims use persistence.size / storageClass from base values.yaml diff --git a/k8s/devops-python/values.yaml b/k8s/devops-python/values.yaml new file mode 100644 index 0000000000..5b96c28808 --- /dev/null +++ b/k8s/devops-python/values.yaml @@ -0,0 +1,183 @@ +# Default values for devops-python. + +replicaCount: 3 + +# Lab 14 default: Argo Rollout. Lab 15: set workload.kind: statefulSet (see values-statefulset.yaml). +workload: + kind: rollout + +# Lab 15 — StatefulSet pod management and update strategy (partition / OnDelete bonus) +statefulSet: + podManagementPolicy: OrderedReady + updateStrategy: + type: RollingUpdate + rollingUpdate: + partition: 0 + +image: + repository: jambulancia/devops-info-service + tag: latest + pullPolicy: IfNotPresent + +nameOverride: "" +fullnameOverride: "" + +service: + enabled: true + type: NodePort + port: 80 + targetPort: 5000 + nodePort: 30080 + # Second NodePort for blue-green preview (Lab 14); set in values-bluegreen.yaml + previewNodePort: 30081 + +containerPort: 5000 + +env: + HOST: "0.0.0.0" + PORT: "5000" + LOG_FORMAT: "json" + VISITS_DATA_PATH: "/data/visits" + +# Lab 12 — ConfigMaps (file + env) +config: + enabled: true + injectEnv: true + environment: dev + logLevel: INFO + featureDebug: false + +# Lab 12 — persistence for visit counter (RWO: one node only; use 1 replica or RWX storage for multi-node) +# If disabled, set env.VISITS_DATA_PATH to a writable path (e.g. /tmp/visits); the container root FS is read-only. +persistence: + enabled: true + size: 100Mi + storageClass: "" + +# Lab 12 bonus — rolling restart when ConfigMap-relevant inputs change +configMapChecksum: + enabled: true + +resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 200m + memory: 256Mi + +securityContext: + pod: + seccompProfile: + type: RuntimeDefault + container: + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 1000 + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: + - ALL + +probes: + startup: + enabled: true + path: /health + initialDelaySeconds: 0 + periodSeconds: 5 + timeoutSeconds: 2 + failureThreshold: 20 + readiness: + enabled: true + path: /health + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 2 + failureThreshold: 3 + liveness: + enabled: true + path: /health + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 2 + failureThreshold: 3 + +# Lab 14 — Argo Rollouts (replaces apps/v1 Deployment; install controller: see k8s/rollouts/) +# strategy: canary (default) or blueGreen — set rollout.strategy (see values-bluegreen.yaml) +rollout: + strategy: canary + canary: + maxSurge: "25%" + maxUnavailable: 0 + blueGreen: + autoPromotionEnabled: false + scaleDownDelaySeconds: 30 + previewReplicaCount: 1 + analysis: + enabled: false + healthPath: /health + +# Lab 16 — init containers (kube-prometheus doc: k8s/MONITORING.md) +initContainers: + enabled: true + download: + enabled: true + image: busybox:1.36 + url: "http://example.com" + filename: "index.html" + waitForService: + enabled: true + image: busybox:1.36 + # Resolves in every cluster — Lab 16 “wait until dependency exists” pattern + host: "kubernetes.default.svc.cluster.local" + +# Lab 16 bonus — Prometheus Operator ServiceMonitor (install kube-prometheus-stack first) +serviceMonitor: + enabled: false + # Must match kube-prometheus-stack Helm release name (labels Prometheus resource) + releaseLabel: monitoring + interval: 30s + scrapeTimeout: 10s + +# Service account (required for Vault Kubernetes auth — Lab 11) +serviceAccount: + create: true + name: "" + +# Helm-managed Secret — use placeholders; override at install with --set-file or -f +secrets: + enabled: true + create: true + existingSecretName: "" + stringData: + username: "changeme-user" + password: "changeme-pass" + +# HashiCorp Vault Agent Injector (Lab 11 Task 3) +# Install Vault: see k8s/SECRETS.md — enable after Vault + auth + role exist. +vault: + role: devops-python + secretPath: secret/data/myapp/config + injector: + enabled: false + templateEnabled: false + extraAnnotations: {} + +hooks: + enabled: true + preInstall: + image: busybox:1.36 + command: + - sh + - -c + - "echo pre-install: basic validation && sleep 2 && echo pre-install: ok" + weight: -5 + postInstall: + image: busybox:1.36 + command: + - sh + - -c + - "echo post-install: smoke test placeholder && sleep 2 && echo post-install: ok" + weight: 5 + diff --git a/k8s/evidence/01-kind-create.txt b/k8s/evidence/01-kind-create.txt new file mode 100644 index 0000000000..fd5174b5a2 --- /dev/null +++ b/k8s/evidence/01-kind-create.txt @@ -0,0 +1,21 @@ +Creating cluster "devops-core" ... + • Ensuring node image (kindest/node:v1.32.2) 🖼 ... + ✓ Ensuring node image (kindest/node:v1.32.2) 🖼 + • Preparing nodes ... + ✓ Preparing nodes + • Writing configuration ... + ✓ Writing configuration + • Starting control-plane ️ ... + ✓ Starting control-plane ️ + • Installing CNI ... + ✓ Installing CNI + • Installing StorageClass ... + ✓ Installing StorageClass + • Waiting ≤ 2m0s for control-plane = Ready ... + ✓ Waiting ≤ 2m0s for control-plane = Ready + • Ready after 18s +Set kubectl context to "kind-devops-core" +You can now use your cluster with: + +kubectl cluster-info --context kind-devops-core + diff --git a/k8s/evidence/02-kubectl-cluster-info.txt b/k8s/evidence/02-kubectl-cluster-info.txt new file mode 100644 index 0000000000..8f62b7ca0a --- /dev/null +++ b/k8s/evidence/02-kubectl-cluster-info.txt @@ -0,0 +1,4 @@ +Kubernetes control plane is running at https://127.0.0.1:45389 +CoreDNS is running at https://127.0.0.1:45389/api/v1/namespaces/kube-system/services/kube-dns:dns/proxy + +To further debug and diagnose cluster problems, use 'kubectl cluster-info dump'. diff --git a/k8s/evidence/03-kubectl-get-nodes.txt b/k8s/evidence/03-kubectl-get-nodes.txt new file mode 100644 index 0000000000..0c8c4c4751 --- /dev/null +++ b/k8s/evidence/03-kubectl-get-nodes.txt @@ -0,0 +1,2 @@ +NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME +devops-core-control-plane Ready control-plane 40s v1.32.2 172.23.0.2 Debian GNU/Linux 12 (bookworm) 6.14.0-33-generic containerd://2.0.2 diff --git a/k8s/evidence/04-kubectl-get-namespaces.txt b/k8s/evidence/04-kubectl-get-namespaces.txt new file mode 100644 index 0000000000..89089408a1 --- /dev/null +++ b/k8s/evidence/04-kubectl-get-namespaces.txt @@ -0,0 +1,6 @@ +NAME STATUS AGE +default Active 40s +kube-node-lease Active 39s +kube-public Active 40s +kube-system Active 40s +local-path-storage Active 36s diff --git a/k8s/evidence/05-apply-lab9-manifests.txt b/k8s/evidence/05-apply-lab9-manifests.txt new file mode 100644 index 0000000000..ccebc60dcc --- /dev/null +++ b/k8s/evidence/05-apply-lab9-manifests.txt @@ -0,0 +1,2 @@ +deployment.apps/devops-python created +service/devops-python created diff --git a/k8s/evidence/06-rollout-status-initial.txt b/k8s/evidence/06-rollout-status-initial.txt new file mode 100644 index 0000000000..02fa0003b7 --- /dev/null +++ b/k8s/evidence/06-rollout-status-initial.txt @@ -0,0 +1,2 @@ +Waiting for deployment "devops-python" rollout to finish: 0 of 3 updated replicas are available... +error: timed out waiting for the condition diff --git a/k8s/evidence/07-get-pods-after-timeout.txt b/k8s/evidence/07-get-pods-after-timeout.txt new file mode 100644 index 0000000000..bfdaf29026 --- /dev/null +++ b/k8s/evidence/07-get-pods-after-timeout.txt @@ -0,0 +1,4 @@ +NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES +devops-python-86879c794d-d6jkn 0/1 CreateContainerConfigError 0 2m9s 10.244.0.5 devops-core-control-plane +devops-python-86879c794d-knwvj 0/1 CreateContainerConfigError 0 2m9s 10.244.0.6 devops-core-control-plane +devops-python-86879c794d-svr62 0/1 CreateContainerConfigError 0 2m9s 10.244.0.7 devops-core-control-plane diff --git a/k8s/evidence/08-describe-deployment-initial.txt b/k8s/evidence/08-describe-deployment-initial.txt new file mode 100644 index 0000000000..ace7ffb565 --- /dev/null +++ b/k8s/evidence/08-describe-deployment-initial.txt @@ -0,0 +1,47 @@ +Name: devops-python +Namespace: default +CreationTimestamp: Fri, 03 Apr 2026 13:27:44 +0300 +Labels: app=devops-python + lab=09 +Annotations: deployment.kubernetes.io/revision: 1 +Selector: app=devops-python +Replicas: 3 desired | 3 updated | 3 total | 0 available | 3 unavailable +StrategyType: RollingUpdate +MinReadySeconds: 0 +RollingUpdateStrategy: 0 max unavailable, 1 max surge +Pod Template: + Labels: app=devops-python + lab=09 + Containers: + app: + Image: jambulancia/devops-info-service:latest + Port: 5000/TCP (http) + Host Port: 0/TCP (http) + Limits: + cpu: 200m + memory: 256Mi + Requests: + cpu: 100m + memory: 128Mi + Liveness: http-get http://:http/health delay=10s timeout=2s period=10s #success=1 #failure=3 + Readiness: http-get http://:http/health delay=5s timeout=2s period=5s #success=1 #failure=3 + Startup: http-get http://:http/health delay=0s timeout=2s period=5s #success=1 #failure=20 + Environment: + HOST: 0.0.0.0 + PORT: 5000 + LOG_FORMAT: json + Mounts: + Volumes: + Node-Selectors: + Tolerations: +Conditions: + Type Status Reason + ---- ------ ------ + Available False MinimumReplicasUnavailable + Progressing True ReplicaSetUpdated +OldReplicaSets: +NewReplicaSet: devops-python-86879c794d (3/3 replicas created) +Events: + Type Reason Age From Message + ---- ------ ---- ---- ------- + Normal ScalingReplicaSet 2m10s deployment-controller Scaled up replica set devops-python-86879c794d from 0 to 3 diff --git a/k8s/evidence/09-events-tail.txt b/k8s/evidence/09-events-tail.txt new file mode 100644 index 0000000000..47b9f094b3 --- /dev/null +++ b/k8s/evidence/09-events-tail.txt @@ -0,0 +1,28 @@ +LAST SEEN TYPE REASON OBJECT MESSAGE +2m58s Normal Starting node/devops-core-control-plane Starting kubelet. +2m58s Normal NodeAllocatableEnforced node/devops-core-control-plane Updated Node Allocatable limit across pods +2m57s Normal NodeHasSufficientMemory node/devops-core-control-plane Node devops-core-control-plane status is now: NodeHasSufficientMemory +2m57s Normal NodeHasNoDiskPressure node/devops-core-control-plane Node devops-core-control-plane status is now: NodeHasNoDiskPressure +2m57s Normal NodeHasSufficientPID node/devops-core-control-plane Node devops-core-control-plane status is now: NodeHasSufficientPID +2m51s Normal RegisteredNode node/devops-core-control-plane Node devops-core-control-plane event: Registered Node devops-core-control-plane in Controller +2m48s Normal Starting node/devops-core-control-plane +2m38s Normal NodeReady node/devops-core-control-plane Node devops-core-control-plane status is now: NodeReady +2m13s Normal Scheduled pod/devops-python-86879c794d-svr62 Successfully assigned default/devops-python-86879c794d-svr62 to devops-core-control-plane +2m13s Normal SuccessfulCreate replicaset/devops-python-86879c794d Created pod: devops-python-86879c794d-d6jkn +2m13s Normal ScalingReplicaSet deployment/devops-python Scaled up replica set devops-python-86879c794d from 0 to 3 +2m13s Normal SuccessfulCreate replicaset/devops-python-86879c794d Created pod: devops-python-86879c794d-knwvj +2m13s Normal SuccessfulCreate replicaset/devops-python-86879c794d Created pod: devops-python-86879c794d-svr62 +2m13s Normal Scheduled pod/devops-python-86879c794d-knwvj Successfully assigned default/devops-python-86879c794d-knwvj to devops-core-control-plane +2m13s Normal Scheduled pod/devops-python-86879c794d-d6jkn Successfully assigned default/devops-python-86879c794d-d6jkn to devops-core-control-plane +2m12s Normal Pulling pod/devops-python-86879c794d-knwvj Pulling image "jambulancia/devops-info-service:latest" +2m12s Normal Pulling pod/devops-python-86879c794d-d6jkn Pulling image "jambulancia/devops-info-service:latest" +2m12s Normal Pulling pod/devops-python-86879c794d-svr62 Pulling image "jambulancia/devops-info-service:latest" +2m6s Normal Pulled pod/devops-python-86879c794d-knwvj Successfully pulled image "jambulancia/devops-info-service:latest" in 6.507s (6.507s including waiting). Image size: 44449395 bytes. +11s Warning Failed pod/devops-python-86879c794d-knwvj Error: container has runAsNonRoot and image has non-numeric user (appuser), cannot verify user is non-root (pod: "devops-python-86879c794d-knwvj_default(1ce96b4e-615e-4998-8af3-c298e795e54c)", container: app) +11s Normal Pulled pod/devops-python-86879c794d-knwvj Container image "jambulancia/devops-info-service:latest" already present on machine +1s Warning Failed pod/devops-python-86879c794d-d6jkn Error: container has runAsNonRoot and image has non-numeric user (appuser), cannot verify user is non-root (pod: "devops-python-86879c794d-d6jkn_default(03ba6b68-2af9-4e0d-9ab0-ccd581a9e880)", container: app) +2m5s Normal Pulled pod/devops-python-86879c794d-d6jkn Successfully pulled image "jambulancia/devops-info-service:latest" in 1.186s (7.693s including waiting). Image size: 44449395 bytes. +6s Warning Failed pod/devops-python-86879c794d-svr62 Error: container has runAsNonRoot and image has non-numeric user (appuser), cannot verify user is non-root (pod: "devops-python-86879c794d-svr62_default(59c75935-3327-4d2f-b98a-7d60f20780cc)", container: app) +2m4s Normal Pulled pod/devops-python-86879c794d-svr62 Successfully pulled image "jambulancia/devops-info-service:latest" in 1.256s (8.918s including waiting). Image size: 44449395 bytes. +1s Normal Pulled pod/devops-python-86879c794d-d6jkn Container image "jambulancia/devops-info-service:latest" already present on machine +6s Normal Pulled pod/devops-python-86879c794d-svr62 Container image "jambulancia/devops-info-service:latest" already present on machine diff --git a/k8s/evidence/10-apply-fix-runAsUser.txt b/k8s/evidence/10-apply-fix-runAsUser.txt new file mode 100644 index 0000000000..925c6fbbf6 --- /dev/null +++ b/k8s/evidence/10-apply-fix-runAsUser.txt @@ -0,0 +1 @@ +deployment.apps/devops-python configured diff --git a/k8s/evidence/11-rollout-status-after-fix.txt b/k8s/evidence/11-rollout-status-after-fix.txt new file mode 100644 index 0000000000..6438b11752 --- /dev/null +++ b/k8s/evidence/11-rollout-status-after-fix.txt @@ -0,0 +1,12 @@ +Waiting for deployment "devops-python" rollout to finish: 1 out of 3 new replicas have been updated... +Waiting for deployment "devops-python" rollout to finish: 1 out of 3 new replicas have been updated... +Waiting for deployment "devops-python" rollout to finish: 1 out of 3 new replicas have been updated... +Waiting for deployment "devops-python" rollout to finish: 1 out of 3 new replicas have been updated... +Waiting for deployment "devops-python" rollout to finish: 2 out of 3 new replicas have been updated... +Waiting for deployment "devops-python" rollout to finish: 2 out of 3 new replicas have been updated... +Waiting for deployment "devops-python" rollout to finish: 2 out of 3 new replicas have been updated... +Waiting for deployment "devops-python" rollout to finish: 2 out of 3 new replicas have been updated... +Waiting for deployment "devops-python" rollout to finish: 1 old replicas are pending termination... +Waiting for deployment "devops-python" rollout to finish: 1 old replicas are pending termination... +Waiting for deployment "devops-python" rollout to finish: 1 old replicas are pending termination... +deployment "devops-python" successfully rolled out diff --git a/k8s/evidence/12-get-pods-ready.txt b/k8s/evidence/12-get-pods-ready.txt new file mode 100644 index 0000000000..c1e4a89397 --- /dev/null +++ b/k8s/evidence/12-get-pods-ready.txt @@ -0,0 +1,5 @@ +NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES +devops-python-7df6f66dc8-gqqbt 1/1 Running 0 22s 10.244.0.8 devops-core-control-plane +devops-python-7df6f66dc8-qnpml 1/1 Running 0 16s 10.244.0.9 devops-core-control-plane +devops-python-7df6f66dc8-xz8j2 1/1 Running 0 6s 10.244.0.10 devops-core-control-plane +devops-python-86879c794d-svr62 0/1 Terminating 0 2m52s 10.244.0.7 devops-core-control-plane diff --git a/k8s/evidence/13-get-svc.txt b/k8s/evidence/13-get-svc.txt new file mode 100644 index 0000000000..db6aeca052 --- /dev/null +++ b/k8s/evidence/13-get-svc.txt @@ -0,0 +1,2 @@ +NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE SELECTOR +devops-python NodePort 10.96.208.162 80:30080/TCP 3m18s app=devops-python diff --git a/k8s/evidence/14-get-endpoints.txt b/k8s/evidence/14-get-endpoints.txt new file mode 100644 index 0000000000..74e8a83a50 --- /dev/null +++ b/k8s/evidence/14-get-endpoints.txt @@ -0,0 +1,2 @@ +NAME ENDPOINTS AGE +devops-python 10.244.0.10:5000,10.244.0.8:5000,10.244.0.9:5000 3m18s diff --git a/k8s/evidence/15-port-forward.log b/k8s/evidence/15-port-forward.log new file mode 100644 index 0000000000..674ffb5265 --- /dev/null +++ b/k8s/evidence/15-port-forward.log @@ -0,0 +1,4 @@ +Forwarding from 127.0.0.1:8080 -> 5000 +Forwarding from [::1]:8080 -> 5000 +Handling connection for 8080 +Handling connection for 8080 diff --git a/k8s/evidence/15-port-forward.pid b/k8s/evidence/15-port-forward.pid new file mode 100644 index 0000000000..72ff078afd --- /dev/null +++ b/k8s/evidence/15-port-forward.pid @@ -0,0 +1 @@ +28477 diff --git a/k8s/evidence/16-curl-service.txt b/k8s/evidence/16-curl-service.txt new file mode 100644 index 0000000000..eb15cb2e2e --- /dev/null +++ b/k8s/evidence/16-curl-service.txt @@ -0,0 +1,17 @@ +HTTP/1.1 200 OK +Server: Werkzeug/3.1.5 Python/3.13.12 +Date: Fri, 03 Apr 2026 10:31:11 GMT +Content-Type: application/json +Content-Length: 88 +Connection: close + +{"status":"healthy","timestamp":"2026-04-03T10:31:11.555113+00:00","uptime_seconds":51} +--- +HTTP/1.1 200 OK +Server: Werkzeug/3.1.5 Python/3.13.12 +Date: Fri, 03 Apr 2026 10:31:11 GMT +Content-Type: application/json +Content-Length: 710 +Connection: close + +{"endpoints":[{"description":"Service information","method":"GET","path":"/"},{"description":"Health check","method":"GET","path":"/health"}],"request":{"client_ip":"127.0.0.1","method":"GET","path":"/","user_agent":"curl/8.12.1"},"runtime":{"current_time":"2026-04-03T10:31:11.568634+00:00","timezone":"UTC","uptime_human":"0 hours, 0 minutes","uptime_seconds":51},"service":{"description":"DevOps course info service","framework":"Flask","name":"devops-info-service","version":"1.0.0"},"system":{"architecture":"x86_64","cpu_count":22,"hostname":"devops-python-7df6f66dc8-gqqbt","platform":"Linux","platform_version":"#33-Ubuntu SMP PREEMPT_DYNAMIC Wed Sep 17 23:22:02 UTC 2025","python_version":"3.13.12"}} diff --git a/k8s/evidence/17-kubectl-get-all.txt b/k8s/evidence/17-kubectl-get-all.txt new file mode 100644 index 0000000000..ac175542b8 --- /dev/null +++ b/k8s/evidence/17-kubectl-get-all.txt @@ -0,0 +1,15 @@ +NAME READY STATUS RESTARTS AGE +pod/devops-python-7df6f66dc8-gqqbt 1/1 Running 0 67s +pod/devops-python-7df6f66dc8-qnpml 1/1 Running 0 61s +pod/devops-python-7df6f66dc8-xz8j2 1/1 Running 0 51s + +NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE +service/devops-python NodePort 10.96.208.162 80:30080/TCP 3m37s +service/kubernetes ClusterIP 10.96.0.1 443/TCP 4m22s + +NAME READY UP-TO-DATE AVAILABLE AGE +deployment.apps/devops-python 3/3 3 3 3m37s + +NAME DESIRED CURRENT READY AGE +replicaset.apps/devops-python-7df6f66dc8 3 3 3 67s +replicaset.apps/devops-python-86879c794d 0 0 0 3m37s diff --git a/k8s/evidence/18-describe-deployment-final.txt b/k8s/evidence/18-describe-deployment-final.txt new file mode 100644 index 0000000000..afb3b6591f --- /dev/null +++ b/k8s/evidence/18-describe-deployment-final.txt @@ -0,0 +1,53 @@ +Name: devops-python +Namespace: default +CreationTimestamp: Fri, 03 Apr 2026 13:27:44 +0300 +Labels: app=devops-python + lab=09 +Annotations: deployment.kubernetes.io/revision: 2 +Selector: app=devops-python +Replicas: 3 desired | 3 updated | 3 total | 3 available | 0 unavailable +StrategyType: RollingUpdate +MinReadySeconds: 0 +RollingUpdateStrategy: 0 max unavailable, 1 max surge +Pod Template: + Labels: app=devops-python + lab=09 + Containers: + app: + Image: jambulancia/devops-info-service:latest + Port: 5000/TCP (http) + Host Port: 0/TCP (http) + Limits: + cpu: 200m + memory: 256Mi + Requests: + cpu: 100m + memory: 128Mi + Liveness: http-get http://:http/health delay=10s timeout=2s period=10s #success=1 #failure=3 + Readiness: http-get http://:http/health delay=5s timeout=2s period=5s #success=1 #failure=3 + Startup: http-get http://:http/health delay=0s timeout=2s period=5s #success=1 #failure=20 + Environment: + HOST: 0.0.0.0 + PORT: 5000 + LOG_FORMAT: json + Mounts: + Volumes: + Node-Selectors: + Tolerations: +Conditions: + Type Status Reason + ---- ------ ------ + Available True MinimumReplicasAvailable + Progressing True NewReplicaSetAvailable +OldReplicaSets: devops-python-86879c794d (0/0 replicas created) +NewReplicaSet: devops-python-7df6f66dc8 (3/3 replicas created) +Events: + Type Reason Age From Message + ---- ------ ---- ---- ------- + Normal ScalingReplicaSet 3m37s deployment-controller Scaled up replica set devops-python-86879c794d from 0 to 3 + Normal ScalingReplicaSet 67s deployment-controller Scaled up replica set devops-python-7df6f66dc8 from 0 to 1 + Normal ScalingReplicaSet 61s deployment-controller Scaled down replica set devops-python-86879c794d from 3 to 2 + Normal ScalingReplicaSet 61s deployment-controller Scaled up replica set devops-python-7df6f66dc8 from 1 to 2 + Normal ScalingReplicaSet 51s deployment-controller Scaled down replica set devops-python-86879c794d from 2 to 1 + Normal ScalingReplicaSet 51s deployment-controller Scaled up replica set devops-python-7df6f66dc8 from 2 to 3 + Normal ScalingReplicaSet 45s deployment-controller Scaled down replica set devops-python-86879c794d from 1 to 0 diff --git a/k8s/evidence/19-scale-to-5.txt b/k8s/evidence/19-scale-to-5.txt new file mode 100644 index 0000000000..fa74201c01 --- /dev/null +++ b/k8s/evidence/19-scale-to-5.txt @@ -0,0 +1 @@ +deployment.apps/devops-python scaled diff --git a/k8s/evidence/20-rollout-status-scale.txt b/k8s/evidence/20-rollout-status-scale.txt new file mode 100644 index 0000000000..fcc499286b --- /dev/null +++ b/k8s/evidence/20-rollout-status-scale.txt @@ -0,0 +1,3 @@ +Waiting for deployment "devops-python" rollout to finish: 3 of 5 updated replicas are available... +Waiting for deployment "devops-python" rollout to finish: 4 of 5 updated replicas are available... +deployment "devops-python" successfully rolled out diff --git a/k8s/evidence/21-get-pods-after-scale.txt b/k8s/evidence/21-get-pods-after-scale.txt new file mode 100644 index 0000000000..b097472cf9 --- /dev/null +++ b/k8s/evidence/21-get-pods-after-scale.txt @@ -0,0 +1,6 @@ +NAME READY STATUS RESTARTS AGE +devops-python-7df6f66dc8-55rzb 1/1 Running 0 10s +devops-python-7df6f66dc8-99fjl 1/1 Running 0 10s +devops-python-7df6f66dc8-gqqbt 1/1 Running 0 84s +devops-python-7df6f66dc8-qnpml 1/1 Running 0 78s +devops-python-7df6f66dc8-xz8j2 1/1 Running 0 68s diff --git a/k8s/evidence/22-rollout-history-before-update.txt b/k8s/evidence/22-rollout-history-before-update.txt new file mode 100644 index 0000000000..b83660f9b8 --- /dev/null +++ b/k8s/evidence/22-rollout-history-before-update.txt @@ -0,0 +1,5 @@ +deployment.apps/devops-python +REVISION CHANGE-CAUSE +1 +2 + diff --git a/k8s/evidence/23-set-image-update.txt b/k8s/evidence/23-set-image-update.txt new file mode 100644 index 0000000000..72b9386f85 --- /dev/null +++ b/k8s/evidence/23-set-image-update.txt @@ -0,0 +1,2 @@ +Flag --record has been deprecated, --record will be removed in the future +deployment.apps/devops-python image updated diff --git a/k8s/evidence/24-rollout-status-after-update.txt b/k8s/evidence/24-rollout-status-after-update.txt new file mode 100644 index 0000000000..da95bd9e8d --- /dev/null +++ b/k8s/evidence/24-rollout-status-after-update.txt @@ -0,0 +1 @@ +deployment "devops-python" successfully rolled out diff --git a/k8s/evidence/25-rollout-history-after-update.txt b/k8s/evidence/25-rollout-history-after-update.txt new file mode 100644 index 0000000000..f113660f49 --- /dev/null +++ b/k8s/evidence/25-rollout-history-after-update.txt @@ -0,0 +1,5 @@ +deployment.apps/devops-python +REVISION CHANGE-CAUSE +1 +2 kubectl set image deployment/devops-python app=jambulancia/devops-info-service:latest --record=true + diff --git a/k8s/evidence/26-rollout-undo-to-rev1.txt b/k8s/evidence/26-rollout-undo-to-rev1.txt new file mode 100644 index 0000000000..ad89b4a216 --- /dev/null +++ b/k8s/evidence/26-rollout-undo-to-rev1.txt @@ -0,0 +1 @@ +deployment.apps/devops-python rolled back diff --git a/k8s/evidence/27-rollout-status-after-undo.txt b/k8s/evidence/27-rollout-status-after-undo.txt new file mode 100644 index 0000000000..f90a3cbfc5 --- /dev/null +++ b/k8s/evidence/27-rollout-status-after-undo.txt @@ -0,0 +1,2 @@ +Waiting for deployment "devops-python" rollout to finish: 1 out of 5 new replicas have been updated... +Waiting for deployment "devops-python" rollout to finish: 1 out of 5 new replicas have been updated... diff --git a/k8s/monitoring/install-kube-prometheus-stack.sh b/k8s/monitoring/install-kube-prometheus-stack.sh new file mode 100755 index 0000000000..52f102e6ad --- /dev/null +++ b/k8s/monitoring/install-kube-prometheus-stack.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +# Lab 16 Task 1 — kube-prometheus-stack (cluster monitoring) +# Ref: https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack +set -euo pipefail + +NS="${KUBE_PROMETHEUS_NAMESPACE:-monitoring}" +RELEASE="${KUBE_PROMETHEUS_RELEASE:-monitoring}" + +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm repo update + +kubectl get namespace "$NS" >/dev/null 2>&1 || kubectl create namespace "$NS" + +if helm status "$RELEASE" -n "$NS" >/dev/null 2>&1; then + echo "Helm release '$RELEASE' already in $NS; skipping install." + echo "Upgrade: helm upgrade $RELEASE prometheus-community/kube-prometheus-stack -n $NS --reuse-values" +else + echo "Installing kube-prometheus-stack as '$RELEASE' in namespace $NS..." + helm install "$RELEASE" prometheus-community/kube-prometheus-stack \ + --namespace "$NS" \ + --set grafana.adminPassword=admin +fi + +echo "Waiting for core pods (timeout 300s)..." +kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=prometheus -n "$NS" --timeout=300s 2>/dev/null || true +kubectl get pods,svc -n "$NS" + +echo "" +echo "Grafana: kubectl port-forward svc/${RELEASE}-grafana -n ${NS} 3000:80" +echo " User admin / password: prom-operator (unless overridden) or the password from:" +echo " kubectl get secret ${RELEASE}-grafana -o jsonpath='{.data.admin-password}' | base64 -d; echo" +echo "Prometheus: kubectl port-forward svc/${RELEASE}-kube-prometheus-prometheus -n ${NS} 9090:9090" +echo "Alertmanager: kubectl port-forward svc/${RELEASE}-kube-prometheus-alertmanager -n ${NS} 9093:9093" +echo "" diff --git a/k8s/rollouts/install-argo-rollouts.sh b/k8s/rollouts/install-argo-rollouts.sh new file mode 100755 index 0000000000..bb71b542bf --- /dev/null +++ b/k8s/rollouts/install-argo-rollouts.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +# Lab 14 — Argo Rollouts controller + dashboard (Task 1). +# Docs: https://argoproj.github.io/argo-rollouts/installation/ +set -euo pipefail + +NS=argo-rollouts +INSTALL_URL="https://github.com/argoproj/argo-rollouts/releases/latest/download/install.yaml" +DASH_URL="https://github.com/argoproj/argo-rollouts/releases/latest/download/dashboard-install.yaml" + +echo "Creating namespace ${NS} (if needed)..." +kubectl get namespace "$NS" >/dev/null 2>&1 || kubectl create namespace "$NS" + +echo "Installing Argo Rollouts controller..." +kubectl apply -n "$NS" -f "$INSTALL_URL" + +echo "Installing Rollouts dashboard..." +kubectl apply -n "$NS" -f "$DASH_URL" + +echo "Waiting for rollout controller to be available..." +kubectl rollout status deploy/argo-rollouts -n "$NS" --timeout=180s +kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=argo-rollouts-dashboard -n "$NS" --timeout=120s 2>/dev/null || true + +echo "" +echo "kubectl plugin (install separately):" +echo " https://argoproj.github.io/argo-rollouts/installation/#kubectl-plugin-installation" +echo " e.g. Linux amd64: curl -LO .../kubectl-argo-rollouts-linux-amd64 && chmod +x && sudo mv ... /usr/local/bin/kubectl-argo-rollouts" +echo "" +echo "Dashboard: kubectl port-forward svc/argo-rollouts-dashboard -n ${NS} 3100:3100" +echo " http://localhost:3100" +echo "CLI: kubectl argo rollouts version" +echo " kubectl argo rollouts get rollout -n -w" +echo "" diff --git a/k8s/service.yml b/k8s/service.yml new file mode 100644 index 0000000000..fb4e3bf222 --- /dev/null +++ b/k8s/service.yml @@ -0,0 +1,17 @@ +apiVersion: v1 +kind: Service +metadata: + name: devops-python + labels: + app: devops-python + lab: "09" +spec: + type: NodePort + selector: + app: devops-python + ports: + - name: http + protocol: TCP + port: 80 + targetPort: 5000 + nodePort: 30080 diff --git a/monitoring/.env.example b/monitoring/.env.example new file mode 100644 index 0000000000..45a0ac5803 --- /dev/null +++ b/monitoring/.env.example @@ -0,0 +1,3 @@ +# Production Grafana config. Copy to .env and customize. Do not commit .env. +GF_AUTH_ANONYMOUS_ENABLED=false +GF_SECURITY_ADMIN_PASSWORD=changeme diff --git a/monitoring/docker-compose.yml b/monitoring/docker-compose.yml new file mode 100644 index 0000000000..35b2935f8b --- /dev/null +++ b/monitoring/docker-compose.yml @@ -0,0 +1,174 @@ +services: + loki: + image: grafana/loki:3.0.0 + container_name: loki + ports: + - "3100:3100" + volumes: + - ./loki/config.yml:/etc/loki/config.yml:ro + - loki-data:/loki + command: -config.file=/etc/loki/config.yml + networks: + - logging + deploy: + resources: + limits: + cpus: "1.0" + memory: 1G + reservations: + cpus: "0.25" + memory: 256M + healthcheck: + test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:3100/ready || exit 1"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 10s + labels: + logging: "promtail" + app: "loki" + + promtail: + image: grafana/promtail:3.0.0 + container_name: promtail + ports: + - "9080:9080" + volumes: + - ./promtail/config.yml:/etc/promtail/config.yml:ro + - /var/lib/docker/containers:/var/lib/docker/containers:ro + - /var/run/docker.sock:/var/run/docker.sock:ro + command: -config.file=/etc/promtail/config.yml + networks: + - logging + depends_on: + loki: + condition: service_healthy + deploy: + resources: + limits: + cpus: "0.5" + memory: 512M + reservations: + cpus: "0.1" + memory: 128M + labels: + logging: "promtail" + app: "promtail" + + prometheus: + image: prom/prometheus:v3.9.0 + container_name: prometheus + ports: + - "9090:9090" + volumes: + - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro + - prometheus-data:/prometheus + command: + - --config.file=/etc/prometheus/prometheus.yml + - --storage.tsdb.path=/prometheus + - --storage.tsdb.retention.time=15d + - --storage.tsdb.retention.size=10GB + - --web.enable-lifecycle + networks: + - logging + deploy: + resources: + limits: + cpus: "1.0" + memory: 1G + reservations: + cpus: "0.25" + memory: 256M + healthcheck: + test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:9090/-/healthy || exit 1"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 10s + + grafana: + image: grafana/grafana:12.3.1 + container_name: grafana + ports: + - "3000:3000" + environment: + # Dev: anonymous enabled. For production, set GF_AUTH_ANONYMOUS_ENABLED=false in .env + GF_AUTH_ANONYMOUS_ENABLED: "${GF_AUTH_ANONYMOUS_ENABLED:-true}" + GF_AUTH_ANONYMOUS_ORG_ROLE: Admin + GF_SECURITY_ALLOW_EMBEDDING: "true" + GF_SERVER_ROOT_URL: "http://localhost:3000" + GF_SECURITY_ADMIN_PASSWORD: "${GF_SECURITY_ADMIN_PASSWORD:-admin}" + volumes: + - grafana-data:/var/lib/grafana + - ./grafana/provisioning:/etc/grafana/provisioning:ro + networks: + - logging + depends_on: + loki: + condition: service_healthy + prometheus: + condition: service_healthy + deploy: + resources: + limits: + cpus: "0.5" + memory: 512M + reservations: + cpus: "0.1" + memory: 128M + healthcheck: + test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:3000/api/health || exit 1"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 30s + labels: + logging: "promtail" + app: "grafana" + + app-python: + build: + context: ../app_python + dockerfile: Dockerfile + image: jambulancia/devops-info-service:latest + container_name: devops-python + ports: + - "8000:5000" + # Uncomment for direct Loki push (requires: docker plugin install grafana/loki-docker-driver:3.6.0 --alias loki --grant-all-permissions) + # logging: + # driver: loki + # options: + # loki-url: "http://loki:3100/loki/api/v1/push" + # loki-external-labels: "job=docker,app=devops-python" + environment: + PORT: "5000" + HOST: "0.0.0.0" + LOG_FORMAT: "json" + VISITS_DATA_PATH: "/data/visits" + volumes: + - ./data:/data + networks: + - logging + deploy: + resources: + limits: + cpus: "0.5" + memory: 256M + healthcheck: + test: ["CMD-SHELL", "python -c \"import urllib.request; urllib.request.urlopen('http://localhost:5000/health')\" || exit 1"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 5s + labels: + logging: "promtail" + app: "devops-python" + +volumes: + loki-data: + grafana-data: + prometheus-data: + +networks: + logging: + driver: bridge diff --git a/monitoring/docs/LAB07.md b/monitoring/docs/LAB07.md new file mode 100644 index 0000000000..563ddd0096 --- /dev/null +++ b/monitoring/docs/LAB07.md @@ -0,0 +1,228 @@ +# Lab 7 — Observability & Logging with Loki Stack + + +## 1. Architecture + +``` +┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ +│ app-python │ │ Loki │ │ Grafana │ +│ (Flask) │ │ (Log Storage) │ │ (Visualization)│ +│ Port 8000 │ │ Port 3100 │ │ Port 3000 │ +└────────┬────────┘ └────────▲────────┘ └────────▲────────┘ + │ │ │ + │ JSON logs │ push │ query + │ (stdout) │ │ + ▼ │ │ +┌─────────────────┐ ┌────────┴────────┐ ┌───────┴────────┐ +│ Docker │ │ Promtail │ │ Loki Data │ +│ (json-file │────▶│ (Log Collector)│────▶│ Source │ +│ log driver) │ │ Port 9080 │ │ (preconfigured)│ +└─────────────────┘ └─────────────────┘ └────────────────┘ + │ │ + │ /var/lib/docker/ │ Docker SD + │ containers │ + filters + └───────────────────────┘ + +All services run on the `logging` bridge network. +Promtail discovers containers via Docker socket, reads log files, and pushes to Loki. +``` + +**Component roles:** +- **Loki:** Stores logs with TSDB index; 7-day retention; compactor cleans old data +- **Promtail:** Discovers containers (label `logging=promtail`), reads Docker log files, pushes to Loki +- **Grafana:** Queries Loki via LogQL, dashboards, Explore + +--- + +## 2. Setup Guide + +### Prerequisites +- Docker and Docker Compose v2 +- Lab 1 Python app built as `jambulancia/devops-info-service:latest` + +### Deploy + +```bash +cd monitoring +docker compose up -d +docker compose ps +``` + +### Verify + +```bash +# Loki readiness +curl http://localhost:3100/ready + +# Promtail targets (log discovery) +curl http://localhost:9080/targets + +# Grafana +open http://localhost:3000 +``` + +### Configure Loki Data Source in Grafana + +1. **Connections** → **Data sources** → **Add data source** → **Loki** +2. URL: `http://loki:3100` +3. **Save & Test** + +### Generate Logs + +```bash +for i in {1..20}; do curl http://localhost:8000/; done +for i in {1..20}; do curl http://localhost:8000/health; done +``` + +### Alternative: Loki Docker Logging Driver + +If Promtail Docker SD does not push logs in your environment, you can use the Loki Docker logging driver for the app: + +```bash +docker plugin install grafana/loki-docker-driver:3.6.0 --alias loki --grant-all-permissions +``` + +Then add to `app-python` in docker-compose: + +```yaml + logging: + driver: loki + options: + loki-url: "http://loki:3100/loki/api/v1/push" + loki-external-labels: "job=docker,app=devops-python" +``` + +--- + +## 3. Configuration + +### Loki (`loki/config.yml`) + +- **Schema:** v13 with TSDB and filesystem storage +- **Retention:** 168h (7 days) via `limits_config.retention_period` +- **Compactor:** Enabled to delete data beyond retention + +```yaml +limits_config: + retention_period: 168h + +compactor: + retention_enabled: true +``` + +### Promtail (`promtail/config.yml`) + +- **Docker SD:** Discovers containers with label `logging=promtail` +- **`__path__` relabel:** Points to `/var/lib/docker/containers/${id}/*.log` so Promtail reads Docker log files +- **Labels:** `container` (from name), `service` (from compose service label) +- **Pipeline:** `docker: {}` parses Docker JSON log wrapper + +```yaml +relabel_configs: + - source_labels: ['__meta_docker_container_id'] + regex: '(.+)' + target_label: __path__ + replacement: /var/lib/docker/containers/${1}/*.log + - source_labels: ['__meta_docker_container_name'] + regex: '/(.*)' + target_label: 'container' + - source_labels: ['__meta_docker_container_label_com_docker_compose_service'] + target_label: 'service' +``` + +--- + +## 4. Application Logging + +The Python app uses `python-json-logger` when `LOG_FORMAT=json` is set. + +- **Format:** `{"timestamp": "...", "level": "...", "message": "...", "method": "...", "path": "...", ...}` +- **Events logged:** Startup, request received, response sent, 404, 500 +- **Context:** method, path, status_code, client_ip + +```python +# app_python/app.py +from pythonjsonlogger import jsonlogger + +if USE_JSON_LOGGING: + formatter = jsonlogger.JsonFormatter() + handler.setFormatter(formatter) +``` + +Docker Compose sets `LOG_FORMAT: "json"` for the app service. + +--- + +## 5. Dashboard + +Create a dashboard with 4 panels: + +| Panel | Type | LogQL Query | +|--------------------|-------------|-----------------------------------------------------------------------------| +| Logs Table | Logs | `{app=~"devops-.*"}` | +| Request Rate | Time series | `sum by (app) (rate({app=~"devops-.*"} [1m]))` | +| Error Logs | Logs | `{app=~"devops-.*"} \| json \| level="ERROR"` | +| Log Level Distribution | Stat/Pie | `sum by (level) (count_over_time({app=~"devops-.*"} \| json [5m]))` | + +**How to create:** +1. **Dashboard** → **New** → **New Dashboard** → **Add visualization** +2. Select **Loki** data source +3. Enter LogQL and choose visualization type + +--- + +## 6. Production Config + +- **Resource limits:** All services have `deploy.resources.limits` (CPU, memory) +- **Health checks:** Loki (`/ready`), Grafana (`/api/health`) +- **Grafana security:** Disable anonymous auth for production: + - `GF_AUTH_ANONYMOUS_ENABLED: "false"` + - `GF_SECURITY_ADMIN_PASSWORD` from `.env` (do not commit) +- **Secrets:** Use `.env` for Grafana admin password; add to `.gitignore` + +--- + +## 7. Testing + +```bash +# Full stack health +cd monitoring +docker compose ps + +# Loki +curl -s http://localhost:3100/ready + +# Promtail targets +curl -s http://localhost:9080/targets | head -50 + +# Generate traffic +for i in {1..10}; do curl -s http://localhost:8000/ > /dev/null; done +for i in {1..10}; do curl -s http://localhost:8000/health > /dev/null; done +``` + +**Grafana Explore:** Run `{job="docker"}` or `{app="devops-python"}` to confirm logs appear. + +--- + +## 8. Challenges & Solutions + +| Challenge | Solution | +|------------------------------------|--------------------------------------------------------------------------| +| Promtail Docker SD not pushing logs| Ensure Docker socket + `/var/lib/docker/containers` are mounted. Try `curl localhost:9080/targets`. Alternative: use [Loki Docker logging driver](https://grafana.com/docs/loki/latest/send-data/docker-driver/) | +| Too many containers discovered | Use `filters: - name: label values: ["logging=promtail"]` in Docker SD | +| JSON parsing in LogQL | Use `\| json` pipeline stage; filter with `level="ERROR"` | +| Label vs service name | Use `__meta_docker_container_label_com_docker_compose_service` for app | +| Loki compactor config error | Add `delete_request_store: filesystem` when `retention_enabled: true` | + +--- + +## Evidence Checklist + +- [x] Loki, Promtail, Grafana running via Docker Compose +- [x] Loki data source in Grafana +- [x] Python app with JSON logging +- [x] Logs visible in Grafana from all labeled containers +- [x] Dashboard with 4 panels +- [x] LogQL queries for streams, errors, rates, levels +- [x] Resource limits and health checks +- [x] LAB07.md with setup and config notes diff --git a/monitoring/docs/LAB08.md b/monitoring/docs/LAB08.md new file mode 100644 index 0000000000..413b2bbb71 --- /dev/null +++ b/monitoring/docs/LAB08.md @@ -0,0 +1,181 @@ +# Lab 8 — Metrics & Monitoring with Prometheus + +**Completion Date:** February 2026 +**Tech Stack:** Prometheus 3.9 + Grafana 12.3 + prometheus_client 0.23 + +--- + +## 1. Architecture + +``` +┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ +│ app-python │ │ Prometheus │ │ Grafana │ +│ /metrics │◀────│ (scrape) │◀────│ (query) │ +│ Port 5000 │ │ Port 9090 │ │ Port 3000 │ +└────────┬────────┘ └────────▲────────┘ └─────────────────┘ + │ │ + │ pull /metrics │ PromQL + │ │ + │ ┌────────┴────────┐ + │ │ Loki, Grafana │ + │ │ (self-metrics) │ + └──────────────┴─────────────────┘ + +Metric flow: App exposes /metrics → Prometheus scrapes every 15s → Grafana queries via PromQL +``` + +--- + +## 2. Application Instrumentation + +### Metrics Implemented (RED Method) + +| Metric | Type | Labels | Purpose | +|--------|------|--------|---------| +| `http_requests_total` | Counter | method, endpoint, status | **R**ate – total requests, error counting | +| `http_request_duration_seconds` | Histogram | method, endpoint | **D**uration – latency distribution | +| `http_requests_in_progress` | Gauge | — | Active/concurrent requests | +| `devops_info_endpoint_calls` | Counter | endpoint | Endpoint usage | +| `devops_info_system_collection_seconds` | Histogram | — | System info collection time (/) | + +### Endpoint Normalization + +Paths are normalized for low cardinality: +- `/`, `/health`, `/metrics` → used as-is +- Other paths (e.g. 404) → `other` + +### Code Snippet + +```python +from prometheus_client import Counter, Histogram, Gauge, generate_latest + +http_requests_total = Counter( + "http_requests_total", "Total HTTP requests", + ["method", "endpoint", "status"]) +http_request_duration_seconds = Histogram( + "http_request_duration_seconds", "HTTP request duration", + ["method", "endpoint"], buckets=(0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0)) +http_requests_in_progress = Gauge("http_requests_in_progress", "Requests in progress") +``` + +--- + +## 3. Prometheus Configuration + +**File:** `monitoring/prometheus/prometheus.yml` + +```yaml +global: + scrape_interval: 15s + evaluation_interval: 15s + +scrape_configs: + - job_name: prometheus + static_configs: + - targets: ["localhost:9090"] + - job_name: app + static_configs: + - targets: ["app-python:5000"] + metrics_path: /metrics + - job_name: loki + static_configs: + - targets: ["loki:3100"] + metrics_path: /metrics + - job_name: grafana + static_configs: + - targets: ["grafana:3000"] + metrics_path: /metrics +``` + +**Retention:** 15 days, 10GB (via command flags in docker-compose) + +--- + +## 4. Dashboard Walkthrough + +**Dashboard:** DevOps App Metrics (provisioned) + +| Panel | Type | Query | Purpose | +|-------|------|-------|---------| +| Request Rate | Time series | `sum(rate(http_requests_total{job="app"}[5m])) by (endpoint)` | Requests/sec per endpoint | +| Error Rate | Time series | `sum(rate(http_requests_total{job="app",status=~"5.."}[5m]))` | 5xx errors/sec | +| Request Duration p95 | Time series | `histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket{job="app"}[5m])) by (le, endpoint))` | 95th percentile latency | +| Active Requests | Gauge | `http_requests_in_progress{job="app"}` | Concurrent requests | +| Status Code Distribution | Pie chart | `sum by (status) (rate(http_requests_total{job="app"}[5m]))` | 2xx vs 4xx vs 5xx | +| Service Uptime | Stat | `up{job="app"}` | 1=UP, 0=DOWN | +| Request Duration Heatmap | Heatmap | `sum(rate(http_request_duration_seconds_bucket{job="app"}[5m])) by (le)` | Latency distribution | + +--- + +## 5. PromQL Examples + +```promql +# Request rate (req/s) +rate(http_requests_total[5m]) +sum(rate(http_requests_total{job="app"}[5m])) by (endpoint) + +# Error rate (5xx) +sum(rate(http_requests_total{status=~"5.."}[5m])) + +# p95 latency +histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket[5m])) by (le, endpoint)) + +# Active requests +http_requests_in_progress + +# Service health +up{job="app"} # 1 = up, 0 = down +``` + +--- + +## 6. Production Setup + +- **Health checks:** Prometheus (`/-/healthy`), app (`/health`) +- **Resource limits:** Prometheus 1G/1 CPU; Grafana 512M/0.5 CPU; app 256M/0.5 CPU +- **Retention:** 15d, 10GB +- **Volumes:** `prometheus-data`, `loki-data`, `grafana-data` for persistence + +--- + +## 7. Testing + +```bash +cd monitoring +docker compose up -d +docker compose ps # all healthy + +# App metrics +curl http://localhost:8000/metrics + +# Prometheus targets +open http://localhost:9090/targets # all UP + +# Grafana +open http://localhost:3000 +# Dashboards → DevOps App Metrics +``` + +**Generate traffic:** +```bash +for i in {1..50}; do curl -s http://localhost:8000/ > /dev/null; done +for i in {1..50}; do curl -s http://localhost:8000/health > /dev/null; done +``` + +--- + +## 8. Challenges & Solutions + +| Challenge | Solution | +|-----------|----------| +| App healthcheck in slim image | Use `python -c "import urllib.request; urllib.request.urlopen(...)"` instead of curl | +| Datasource UID for provisioning | Set `uid: prometheus` in datasources.yaml so dashboards resolve | +| Prometheus 3.x storage config | Use CLI flags `--storage.tsdb.retention.time` and `--storage.tsdb.retention.size` | + +--- + +## Metrics vs Logs (Lab 7) + +- **Metrics:** Aggregated numbers (rate, latency, counts); good for dashboards and alerting +- **Logs:** Per-event records with context; good for debugging and audit +- **Together:** Metrics for trends and SLOs, logs for root cause analysis diff --git a/monitoring/grafana/provisioning/dashboards/dashboard.yml b/monitoring/grafana/provisioning/dashboards/dashboard.yml new file mode 100644 index 0000000000..636f645410 --- /dev/null +++ b/monitoring/grafana/provisioning/dashboards/dashboard.yml @@ -0,0 +1,11 @@ +apiVersion: 1 + +providers: + - name: "default" + orgId: 1 + folder: "" + type: file + disableDeletion: false + updateIntervalSeconds: 30 + options: + path: /etc/grafana/provisioning/dashboards diff --git a/monitoring/grafana/provisioning/dashboards/devops-app-metrics.json b/monitoring/grafana/provisioning/dashboards/devops-app-metrics.json new file mode 100644 index 0000000000..24e414386c --- /dev/null +++ b/monitoring/grafana/provisioning/dashboards/devops-app-metrics.json @@ -0,0 +1,166 @@ +{ + "annotations": { + "list": [] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { "color": { "mode": "palette-classic" }, "unit": "reqps" }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 }, + "id": 1, + "options": { "legend": { "displayMode": "list", "placement": "bottom" } }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "sum(rate(http_requests_total{job=\"app\"}[5m])) by (endpoint)", + "legendFormat": "{{endpoint}}", + "refId": "A" + } + ], + "title": "Request Rate by Endpoint", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { "color": { "mode": "palette-classic" }, "unit": "reqps" }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 }, + "id": 2, + "options": { "legend": { "displayMode": "list", "placement": "bottom" } }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "sum(rate(http_requests_total{job=\"app\",status=~\"5..\"}[5m]))", + "legendFormat": "5xx errors", + "refId": "A" + } + ], + "title": "Error Rate (5xx)", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { "color": { "mode": "palette-classic" }, "unit": "s" }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 }, + "id": 3, + "options": { "legend": { "displayMode": "list", "placement": "bottom" } }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket{job=\"app\"}[5m])) by (le, endpoint))", + "legendFormat": "p95 {{endpoint}}", + "refId": "A" + } + ], + "title": "Request Duration p95", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { "color": { "mode": "thresholds" }, "unit": "short" }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 }, + "id": 4, + "options": { "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"] } }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "http_requests_in_progress{job=\"app\"}", + "refId": "A" + } + ], + "title": "Active Requests", + "type": "gauge" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { "color": { "mode": "palette-classic" } }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 16 }, + "id": 5, + "options": { "legend": { "displayMode": "list", "placement": "right" } }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "sum by (status) (rate(http_requests_total{job=\"app\"}[5m]))", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Status Code Distribution", + "type": "piechart" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [{ "options": { "0": { "color": "red", "text": "DOWN" } }, "type": "value" }, { "options": { "1": { "color": "green", "text": "UP" } }, "type": "value" }] + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 16 }, + "id": 6, + "options": { "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"] } }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "up{job=\"app\"}", + "refId": "A" + } + ], + "title": "Service Uptime", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { "color": { "mode": "scheme" }, "unit": "reqps" }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 24, "x": 0, "y": 24 }, + "id": 7, + "options": { "calculateFromStack": true }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "sum(rate(http_request_duration_seconds_bucket{job=\"app\"}[5m])) by (le)", + "format": "heatmap", + "legendFormat": "{{le}}", + "refId": "A" + } + ], + "title": "Request Duration Heatmap", + "type": "heatmap" + } + ], + "refresh": "10s", + "schemaVersion": 39, + "style": "dark", + "tags": ["devops", "app", "lab8"], + "templating": { "list": [] }, + "time": { "from": "now-1h", "to": "now" }, + "timepicker": {}, + "timezone": "browser", + "title": "DevOps App Metrics", + "uid": "devops-app-metrics", + "version": 1 +} diff --git a/monitoring/grafana/provisioning/datasources/datasources.yaml b/monitoring/grafana/provisioning/datasources/datasources.yaml new file mode 100644 index 0000000000..7323026349 --- /dev/null +++ b/monitoring/grafana/provisioning/datasources/datasources.yaml @@ -0,0 +1,16 @@ +apiVersion: 1 + +datasources: + - name: Prometheus + type: prometheus + uid: prometheus + access: proxy + url: http://prometheus:9090 + isDefault: true + editable: false + + - name: Loki + type: loki + access: proxy + url: http://loki:3100 + editable: false diff --git a/monitoring/loki/config.yml b/monitoring/loki/config.yml new file mode 100644 index 0000000000..2d54cfb545 --- /dev/null +++ b/monitoring/loki/config.yml @@ -0,0 +1,50 @@ +auth_enabled: false + +server: + http_listen_port: 3100 + grpc_listen_port: 9096 + log_level: info + +common: + path_prefix: /loki + replication_factor: 1 + ring: + kvstore: + store: inmemory + +query_range: + results_cache: + cache: + embedded_cache: + enabled: true + max_size_mb: 100 + +limits_config: + retention_period: 168h # 7 days + +compactor: + working_directory: /loki/compactor + compaction_interval: 10m + retention_enabled: true + delete_request_store: filesystem + +schema_config: + configs: + - from: 2020-10-24 + store: tsdb + object_store: filesystem + schema: v13 + index: + prefix: index_ + period: 24h + +storage_config: + filesystem: + directory: /loki/chunks + +ruler: + alertmanager_url: http://localhost:9093 + storage: + type: local + local: + directory: /loki/rules diff --git a/monitoring/prometheus/prometheus.yml b/monitoring/prometheus/prometheus.yml new file mode 100644 index 0000000000..706f8d6bcb --- /dev/null +++ b/monitoring/prometheus/prometheus.yml @@ -0,0 +1,23 @@ +global: + scrape_interval: 15s + evaluation_interval: 15s + +scrape_configs: + - job_name: prometheus + static_configs: + - targets: ["localhost:9090"] + + - job_name: app + static_configs: + - targets: ["app-python:5000"] + metrics_path: /metrics + + - job_name: loki + static_configs: + - targets: ["loki:3100"] + metrics_path: /metrics + + - job_name: grafana + static_configs: + - targets: ["grafana:3000"] + metrics_path: /metrics diff --git a/monitoring/promtail/config.yml b/monitoring/promtail/config.yml new file mode 100644 index 0000000000..b8e92a3604 --- /dev/null +++ b/monitoring/promtail/config.yml @@ -0,0 +1,30 @@ +server: + http_listen_port: 9080 + grpc_listen_port: 0 + +positions: + filename: /tmp/positions.yaml + +clients: + - url: http://loki:3100/loki/api/v1/push + +scrape_configs: + - job_name: docker + docker_sd_configs: + - host: unix:///var/run/docker.sock + refresh_interval: 5s + filters: + - name: label + values: ["logging=promtail"] + relabel_configs: + - source_labels: ['__meta_docker_container_name'] + regex: '/(.*)' + target_label: 'container' + - source_labels: ['__meta_docker_container_label_com_docker_compose_service'] + target_label: 'service' + regex: '(.+)' + - source_labels: ['__meta_docker_container_label_app'] + target_label: 'app' + regex: '(.+)' + pipeline_stages: + - docker: {} diff --git a/pulumi/.gitignore b/pulumi/.gitignore new file mode 100644 index 0000000000..439e8a21a6 --- /dev/null +++ b/pulumi/.gitignore @@ -0,0 +1,17 @@ +# Pulumi +Pulumi.*.yaml +!Pulumi.yaml + +# Python +__pycache__/ +*.py[cod] +.venv/ +venv/ +*.egg-info/ + +# IDE +.idea/ +.vscode/ + +# OS +.DS_Store diff --git a/pulumi/Pulumi.yaml b/pulumi/Pulumi.yaml new file mode 100644 index 0000000000..1ed0e01dc6 --- /dev/null +++ b/pulumi/Pulumi.yaml @@ -0,0 +1,3 @@ +name: devops-lab04 +runtime: python +description: Lab 4 - VM on AWS (same as Terraform) diff --git a/pulumi/README.md b/pulumi/README.md new file mode 100644 index 0000000000..3aa1b2ac1e --- /dev/null +++ b/pulumi/README.md @@ -0,0 +1,46 @@ +# Pulumi — Lab 4 IaC + +Same infrastructure as Terraform: one EC2 VM on AWS (free tier) with SSH, HTTP, and port 5000. + +## Prerequisites + +- [Pulumi CLI](https://www.pulumi.com/docs/install/) +- Python 3.9+ +- AWS credentials (env or `~/.aws/credentials`) +- SSH public key at `~/.ssh/id_rsa.pub` (or set `ssh_public_key_path` in config) + +## Setup + +```bash +cd pulumi +python -m venv venv +source venv/bin/activate # or venv\Scripts\activate on Windows +pip install -r requirements.txt +``` + +## Config (optional) + +```bash +pulumi config set aws:region us-east-1 +pulumi config set project_name devops-lab04 +pulumi config set ssh_public_key_path ~/.ssh/id_rsa.pub +pulumi config set allowed_ssh_cidr "YOUR_IP/32" +``` + +## Deploy + +```bash +pulumi preview +pulumi up +pulumi stack output +``` + +## Cleanup + +```bash +pulumi destroy +``` + +## Note + +Destroy Terraform resources before running Pulumi (or use a different project_name/region) to avoid name conflicts (e.g. key pair name). diff --git a/pulumi/__main__.py b/pulumi/__main__.py new file mode 100644 index 0000000000..ecac7b308c --- /dev/null +++ b/pulumi/__main__.py @@ -0,0 +1,64 @@ +"""Lab 4 - Create VM on AWS with Pulumi (same as Terraform).""" +import os +import pulumi +import pulumi_aws as aws + +config = pulumi.Config() +project_name = config.get("project_name") or "devops-lab04" +instance_type = config.get("instance_type") or "t2.micro" +allowed_ssh_cidr = config.get("allowed_ssh_cidr") or "0.0.0.0/0" +ssh_public_key_path = config.get("ssh_public_key_path") or os.path.expanduser("~/.ssh/id_rsa.pub") + +with open(ssh_public_key_path) as f: + public_key_content = f.read() + +# Latest Ubuntu 22.04 LTS AMI +ami = aws.ec2.get_ami( + most_recent=True, + owners=["099720109477"], + filters=[ + aws.ec2.GetAmiFilterArgs(name="name", values=["ubuntu/images/hvm-ssd/ubuntu-jammy-22.04-amd64-server-*"]), + aws.ec2.GetAmiFilterArgs(name="virtualization-type", values=["hvm"]), + ], +) + +# SSH key pair +key_pair = aws.ec2.KeyPair( + "vm-key", + key_name=f"{project_name}-key", + public_key=public_key_content, +) + +# Security group: SSH (22), HTTP (80), app (5000) +sg = aws.ec2.SecurityGroup( + "vm-sg", + name=f"{project_name}-sg", + description="Allow SSH, HTTP, and app port 5000", + ingress=[ + aws.ec2.SecurityGroupIngressArgs(protocol="tcp", from_port=22, to_port=22, cidr_blocks=[allowed_ssh_cidr], description="SSH"), + aws.ec2.SecurityGroupIngressArgs(protocol="tcp", from_port=80, to_port=80, cidr_blocks=["0.0.0.0/0"], description="HTTP"), + aws.ec2.SecurityGroupIngressArgs(protocol="tcp", from_port=5000, to_port=5000, cidr_blocks=["0.0.0.0/0"], description="App"), + ], + egress=[aws.ec2.SecurityGroupEgressArgs(protocol="-1", from_port=0, to_port=0, cidr_blocks=["0.0.0.0/0"])], + tags={"Name": f"{project_name}-sg"}, +) + +# EC2 instance (free tier: t2.micro) +instance = aws.ec2.Instance( + "vm", + ami=ami.id, + instance_type=instance_type, + key_name=key_pair.key_name, + vpc_security_group_ids=[sg.id], + associate_public_ip_address=True, + root_block_device=aws.ec2.InstanceRootBlockDeviceArgs(volume_size=8, volume_type="gp2"), + user_data="""#!/bin/bash +apt-get update -y +apt-get install -y python3 +""", + tags={"Name": f"{project_name}-vm"}, +) + +pulumi.export("public_ip", instance.public_ip) +pulumi.export("instance_id", instance.id) +pulumi.export("ssh_command", instance.public_ip.apply(lambda ip: f"ssh ubuntu@{ip}")) diff --git a/pulumi/requirements.txt b/pulumi/requirements.txt new file mode 100644 index 0000000000..2b3f653eaf --- /dev/null +++ b/pulumi/requirements.txt @@ -0,0 +1,2 @@ +pulumi>=3.0.0 +pulumi-aws>=6.0.0 diff --git a/terraform/.gitignore b/terraform/.gitignore new file mode 100644 index 0000000000..7672ebf3af --- /dev/null +++ b/terraform/.gitignore @@ -0,0 +1,23 @@ +# Terraform +*.tfstate +*.tfstate.* +.terraform/ +.terraform.lock.hcl +terraform.tfvars +*.tfvars +!terraform.tfvars.example + +# Credentials +*.pem +*.key +*.json +credentials +override.tf +override.tf.json +*_override.tf +*_override.tf.json + +# Crash and debug +crash.log +crash.*.log +*.log diff --git a/terraform/.tflint.hcl b/terraform/.tflint.hcl new file mode 100644 index 0000000000..c1f91e49ed --- /dev/null +++ b/terraform/.tflint.hcl @@ -0,0 +1,7 @@ +plugin "terraform" { + enabled = true +} + +plugin "aws" { + enabled = true +} diff --git a/terraform/README.md b/terraform/README.md new file mode 100644 index 0000000000..ef779d408c --- /dev/null +++ b/terraform/README.md @@ -0,0 +1,41 @@ +# Terraform — Lab 4 IaC + +Provisions a single EC2 VM on AWS (free tier) with SSH, HTTP, and port 5000 open. + +## Prerequisites + +- [Terraform](https://developer.hashicorp.com/terraform/downloads) >= 1.9 +- AWS CLI configured, or env vars: `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY` +- SSH key pair (default: `~/.ssh/id_rsa.pub`) + +## Usage + +```bash +cd terraform + +# Copy example tfvars (optional) +cp terraform.tfvars.example terraform.tfvars +# Edit terraform.tfvars with your values (do not commit!) + +terraform init +terraform fmt +terraform validate +terraform plan +terraform apply +``` + +## Outputs + +- `public_ip` — VM public IP +- `ssh_command` — Example SSH command (user: `ubuntu`) + +## Cleanup + +```bash +terraform destroy +``` + +## Security + +- Restrict `allowed_ssh_cidr` to your IP (e.g. `"YOUR_IP/32"`). +- Never commit `terraform.tfvars` or `.tfstate`. diff --git a/terraform/docs/LAB04.md b/terraform/docs/LAB04.md new file mode 100644 index 0000000000..3c7be8a82b --- /dev/null +++ b/terraform/docs/LAB04.md @@ -0,0 +1,83 @@ +# LAB04 — Infrastructure as Code (Terraform & Pulumi) + +## 1. Cloud Provider & Infrastructure + +- **Cloud provider:** AWS + **Rationale:** Widely used, strong Terraform/Pulumi support, free tier (t2.micro, 750 hrs/month for 12 months). +- **Instance type/size:** `t2.micro` (1 vCPU, 1 GiB RAM) — AWS free tier. +- **Region/zone:** `us-east-1` (default; change via `aws_region` / `aws:region`). +- **Estimated cost:** $0 within free tier (ensure no other paid resources). +- **Resources created:** + - EC2 instance (Ubuntu 22.04 LTS) + - Security group (SSH 22, HTTP 80, app 5000) + - Key pair (from your SSH public key) + - Public IP (assigned by default to the instance) + +--- + +## 2. Terraform Implementation + +- **Terraform version:** 1.9+ +- **Project structure:** + - `main.tf` — provider, data source (AMI), key pair, security group, EC2 instance + - `variables.tf` — region, project name, instance type, SSH key path, allowed CIDR, tags + - `outputs.tf` — public IP, instance ID, SSH command + - `terraform.tfvars.example` — example variable values (copy to `terraform.tfvars`, gitignored) +- **Decisions:** Use default VPC; Ubuntu 22.04 AMI via data source; single security group for all rules. +- **Challenges:** (Document any you hit: e.g. AMI ownership, key path, region.) +- **Terminal output:** (Add your own sanitized output.) + - `terraform init` + - `terraform plan` (no secrets) + - `terraform apply` + - SSH connection (e.g. `ssh ubuntu@`) + +--- + +## 3. Pulumi Implementation + +- **Pulumi version:** 3.x | **Language:** Python +- **Differences from Terraform:** Same resources expressed in Python (imperative style); config via `pulumi config`; outputs via `pulumi.export()`. +- **Advantages:** Full Python (loops, conditionals, reuse); IDE support; encrypted secrets in Pulumi Cloud. +- **Challenges:** (Document any: e.g. SSH key path at startup, config vs Terraform variables.) +- **Terminal output:** (Add your own.) + - `pulumi preview` + - `pulumi up` + - SSH connection to Pulumi-created VM + +--- + +## 4. Terraform vs Pulumi Comparison + +| Aspect | Terraform | Pulumi | +|---------------|-----------------------------------|----------------------------------| +| **Ease of learning** | HCL is small and focused; good for simple infra. | Easier if you already know Python/TS. | +| **Readability** | Declarative; structure is clear. | Code can be more compact; logic is explicit. | +| **Debugging** | Plan/output and provider docs. | Stack traces and IDE help. | +| **Documentation** | Large community and registry. | Good docs; smaller ecosystem. | +| **Use case** | Standard choice for multi-cloud IaC, teams. | Strong when you want code reuse and tests. | + +**When to use Terraform:** Multi-cloud, team standardization, lots of examples and modules. +**When to use Pulumi:** Prefer coding in Python/TS, need loops/functions or testing in the same language. + +--- + +## 5. Lab 5 Preparation & Cleanup + +**VM for Lab 5:** +- [ ] Keeping VM for Lab 5? (Yes / No) +- [ ] If yes: Which one? (Terraform / Pulumi) +- [ ] If no: Plan for Lab 5? (Local VM / Recreate cloud VM later) + +**Cleanup status:** +- If keeping one VM: Note which tool created it and that the other stack has been destroyed. +- If destroying all: Run `terraform destroy` and `pulumi destroy`; add short terminal output (no secrets). +- Optional: Screenshot of cloud console showing no (or only intended) resources. + +--- + +## 6. Bonus: IaC CI/CD + +- **Workflow:** `.github/workflows/terraform-ci.yml` runs on changes to `terraform/**`. +- **Steps:** `terraform fmt -check`, `terraform init -backend=false`, `terraform validate`, `tflint --init` then `tflint`. +- **Path filters:** Only when `terraform/**` or the workflow file changes. +- A dummy SSH public key is created in CI so `file()` in Terraform does not fail during validate. diff --git a/terraform/main.tf b/terraform/main.tf new file mode 100644 index 0000000000..e9439e134f --- /dev/null +++ b/terraform/main.tf @@ -0,0 +1,105 @@ +terraform { + required_version = ">= 1.9.0" + + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + } +} + +provider "aws" { + region = var.aws_region + + default_tags { + tags = var.tags + } +} + +# Latest Ubuntu 22.04 LTS AMI +data "aws_ami" "ubuntu" { + most_recent = true + owners = ["099720109477"] # Canonical + + filter { + name = "name" + values = ["ubuntu/images/hvm-ssd/ubuntu-jammy-22.04-amd64-server-*"] + } + + filter { + name = "virtualization-type" + values = ["hvm"] + } +} + +# SSH key pair (uses existing public key) +resource "aws_key_pair" "vm" { + key_name = "${var.project_name}-key" + public_key = file(pathexpand(var.ssh_public_key_path)) +} + +# Security group: SSH (22), HTTP (80), app (5000) +resource "aws_security_group" "vm" { + name = "${var.project_name}-sg" + description = "Allow SSH, HTTP, and app port 5000" + + ingress { + description = "SSH" + from_port = 22 + to_port = 22 + protocol = "tcp" + cidr_blocks = [var.allowed_ssh_cidr] + } + + ingress { + description = "HTTP" + from_port = 80 + to_port = 80 + protocol = "tcp" + cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + description = "App port 5000" + from_port = 5000 + to_port = 5000 + protocol = "tcp" + cidr_blocks = ["0.0.0.0/0"] + } + + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + } + + tags = { + Name = "${var.project_name}-sg" + } +} + +# EC2 instance (free tier: t2.micro) +resource "aws_instance" "vm" { + ami = data.aws_ami.ubuntu.id + instance_type = var.instance_type + key_name = aws_key_pair.vm.key_name + vpc_security_group_ids = [aws_security_group.vm.id] + associate_public_ip_address = true + + root_block_device { + volume_size = 8 + volume_type = "gp2" + } + + user_data = <<-EOT + #!/bin/bash + apt-get update -y + apt-get install -y python3 + EOT + + tags = { + Name = "${var.project_name}-vm" + } +} diff --git a/terraform/outputs.tf b/terraform/outputs.tf new file mode 100644 index 0000000000..cf104d32ed --- /dev/null +++ b/terraform/outputs.tf @@ -0,0 +1,14 @@ +output "public_ip" { + description = "Public IP address of the VM" + value = aws_instance.vm.public_ip +} + +output "instance_id" { + description = "EC2 instance ID" + value = aws_instance.vm.id +} + +output "ssh_command" { + description = "SSH command to connect to the VM" + value = "ssh -i ~/.ssh/id_rsa ubuntu@${aws_instance.vm.public_ip}" +} diff --git a/terraform/terraform.tfvars.example b/terraform/terraform.tfvars.example new file mode 100644 index 0000000000..3ba69f4466 --- /dev/null +++ b/terraform/terraform.tfvars.example @@ -0,0 +1,6 @@ +# Copy to terraform.tfvars and fill in (terraform.tfvars is gitignored) +# aws_region = "us-east-1" +# project_name = "devops-lab04" +# instance_type = "t2.micro" +# ssh_public_key_path = "~/.ssh/id_rsa.pub" +# allowed_ssh_cidr = "YOUR_IP/32" # e.g. "203.0.113.42/32" diff --git a/terraform/variables.tf b/terraform/variables.tf new file mode 100644 index 0000000000..8639c79c04 --- /dev/null +++ b/terraform/variables.tf @@ -0,0 +1,38 @@ +variable "aws_region" { + description = "AWS region for resources" + type = string + default = "us-east-1" +} + +variable "project_name" { + description = "Project name used for resource naming and tags" + type = string + default = "devops-lab04" +} + +variable "instance_type" { + description = "EC2 instance type (use t2.micro for free tier)" + type = string + default = "t2.micro" +} + +variable "ssh_public_key_path" { + description = "Path to your SSH public key for VM access" + type = string + default = "~/.ssh/id_rsa.pub" +} + +variable "allowed_ssh_cidr" { + description = "CIDR block allowed for SSH (restrict to your IP for security)" + type = string + default = "0.0.0.0/0" # Replace with your IP/32 in production +} + +variable "tags" { + description = "Common tags for all resources" + type = map(string) + default = { + Project = "DevOps-Core-Course" + Lab = "lab04" + } +}