diff --git a/.github/workflows/ansible-deploy.yml b/.github/workflows/ansible-deploy.yml new file mode 100644 index 0000000000..99e29446c0 --- /dev/null +++ b/.github/workflows/ansible-deploy.yml @@ -0,0 +1,100 @@ +name: Ansible Deployment + +on: + push: + branches: [ master ] + paths: + - 'ansible/**' + - '!ansible/docs/**' + - '.github/workflows/ansible-deploy.yml' + pull_request: + branches: [ master ] + paths: + - 'ansible/**' + - '!ansible/docs/**' + - '.github/workflows/ansible-deploy.yml' + +jobs: + lint: + name: Ansible Lint + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Install dependencies + run: | + pip install ansible ansible-lint + + - name: Run ansible-lint + run: | + cd ansible + ansible-lint playbooks/provision.yml playbooks/deploy.yml || true + + deploy: + name: Deploy Application + needs: lint + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Install Ansible + run: pip install ansible + + - name: Setup SSH + run: | + mkdir -p ~/.ssh + echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_rsa + chmod 600 ~/.ssh/id_rsa + ssh-keyscan -H ${{ secrets.VM_HOST }} >> ~/.ssh/known_hosts + + - name: Create vault password file + run: echo "${{ secrets.ANSIBLE_VAULT_PASSWORD }}" > /tmp/vault_pass + + - name: Update inventory with VM host + run: | + cat > ansible/inventory/hosts.ini << 'INVENTORY' + [webservers] + lab04-vm ansible_host=${{ secrets.VM_HOST }} ansible_user=${{ secrets.VM_USER }} ansible_ssh_private_key_file=~/.ssh/id_rsa + + [webservers:vars] + ansible_python_interpreter=/usr/bin/python3 + INVENTORY + + - name: Run Ansible playbook + run: | + cd ansible + ansible-playbook playbooks/deploy.yml \ + --vault-password-file /tmp/vault_pass \ + -e "dockerhub_username=nadiaa02" \ + -e "dockerhub_password=${{ secrets.DOCKERHUB_TOKEN }}" \ + -e "app_name=lab02-python-app" \ + -e "docker_image=nadiaa02/lab02-python-app" \ + -e "docker_image_tag=latest" \ + -e "app_port=5000" \ + -e "app_container_name=devops-app" + + - name: Cleanup vault password + run: rm /tmp/vault_pass + + - name: Verify deployment + run: | + sleep 10 + curl -f http://${{ secrets.VM_HOST }}:5000/ || exit 1 + curl -f http://${{ secrets.VM_HOST }}:5000/health || exit 1 + + - name: Show deployment result + run: | + echo "Deployment successful!" + curl http://${{ secrets.VM_HOST }}:5000/health diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml new file mode 100644 index 0000000000..0954a4b580 --- /dev/null +++ b/.github/workflows/python-ci.yml @@ -0,0 +1,126 @@ +name: Python CI/CD Pipeline + +on: + push: + branches: [ main, master, lab03 ] + paths: + - 'app_python/**' + - '.github/workflows/python-ci.yml' + pull_request: + branches: [ main, master ] + paths: + - 'app_python/**' + workflow_dispatch: + +env: + REGISTRY: docker.io + IMAGE_NAME: nadiaa02/devops-python-app + PYTHON_VERSION: '3.11' + +jobs: + test: + name: Lint & Test + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: ${{ env.PYTHON_VERSION }} + cache: 'pip' + cache-dependency-path: 'app_python/requirements*.txt' + + - name: Cache pip packages + uses: actions/cache@v4 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('app_python/requirements*.txt') }} + restore-keys: | + ${{ runner.os }}-pip- + + - name: Install dependencies + working-directory: ./app_python + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install -r requirements-dev.txt + + - name: Lint with flake8 + working-directory: ./app_python + run: | + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + + - name: Run tests with coverage + working-directory: ./app_python + run: | + pytest tests/ -v --cov=. --cov-report=xml --cov-report=term-missing + + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v4 + with: + file: ./app_python/coverage.xml + flags: python + name: python-coverage + token: ${{ secrets.CODECOV_TOKEN }} + continue-on-error: true + + security: + name: Security Scan + runs-on: ubuntu-latest + needs: test + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Run Snyk vulnerability scan + uses: snyk/actions/python@master + env: + SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }} + with: + args: --file=app_python/requirements.txt --severity-threshold=high + continue-on-error: true + + docker: + name: Build & Push Docker Image + runs-on: ubuntu-latest + needs: [test, security] + if: github.event_name == 'push' && github.ref == 'refs/heads/lab03' + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + + - name: Generate version + id: version + run: | + VERSION=$(date -u +'%Y.%m.%d-%H%M') + MONTH=$(date -u +'%Y.%m') + echo "version=$VERSION" >> $GITHUB_OUTPUT + echo "month=$MONTH" >> $GITHUB_OUTPUT + + - name: Build and push + uses: docker/build-push-action@v5 + with: + context: ./app_python + file: ./app_python/Dockerfile + push: true + tags: | + ${{ env.IMAGE_NAME }}:${{ steps.version.outputs.version }} + ${{ env.IMAGE_NAME }}:${{ steps.version.outputs.month }} + ${{ env.IMAGE_NAME }}:latest + cache-from: type=gha + cache-to: type=gha,mode=max diff --git a/.gitignore b/.gitignore index 30d74d2584..26b08a290f 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,21 @@ -test \ No newline at end of file +test +# Terraform +*.tfstate +*.tfstate.* +.terraform/ +terraform.tfvars +*.tfvars +.terraform.lock.hcl + +# Pulumi +pulumi/venv/ +Pulumi.*.yaml + +# Credentials +key.json +*.pem + +# Ansible +*.retry +.vault_pass +__pycache__/ diff --git a/ansible/ansible.cfg b/ansible/ansible.cfg new file mode 100644 index 0000000000..c2704e06bb --- /dev/null +++ b/ansible/ansible.cfg @@ -0,0 +1,12 @@ +[defaults] +inventory = inventory/hosts.ini +roles_path = roles +host_key_checking = False +remote_user = ubuntu +retry_files_enabled = False + +[privilege_escalation] +become = True +become_method = sudo +become_user = root +vault_password_file = .vault_pass diff --git a/ansible/docs/LAB05.md b/ansible/docs/LAB05.md new file mode 100644 index 0000000000..9c1762e796 --- /dev/null +++ b/ansible/docs/LAB05.md @@ -0,0 +1,149 @@ +# Lab 05 — Ansible Fundamentals + +## 1. Architecture Overview + +- **Ansible version:** 2.20.5 (core) +- **Target VM:** Ubuntu 24.04 LTS (93.77.181.6, from Lab 4 Pulumi) +- **Control node:** macOS (local machine) + +### Role Structure: +ansible/ +├── inventory/hosts.ini # Static inventory with VM IP +├── roles/ +│ ├── common/ # System packages +│ ├── docker/ # Docker installation +│ └── app_deploy/ # Application deployment +├── playbooks/ +│ ├── provision.yml # Runs common + docker roles +│ └── deploy.yml # Runs app_deploy role +├── group_vars/all.yml # Encrypted credentials (Vault) +└── ansible.cfg # Ansible configuration + +**Why roles instead of monolithic playbooks?** +Roles separate concerns — each role does one thing. This makes code reusable, testable, and easy to maintain. A monolithic playbook becomes hard to read and impossible to reuse across projects. + +--- + +## 2. Roles Documentation + +### common role +- **Purpose:** Updates apt cache and installs essential system packages +- **Variables:** `common_packages` — list of packages to install (python3-pip, curl, git, vim, htop, etc.) +- **Handlers:** None +- **Dependencies:** None + +### docker role +- **Purpose:** Installs Docker CE on the target VM, ensures service is running, adds user to docker group +- **Variables:** `docker_user` — user to add to docker group (default: ubuntu) +- **Handlers:** `restart docker` — restarts Docker service when triggered by package installation +- **Dependencies:** common (apt cache must be updated) + +### app_deploy role +- **Purpose:** Logs into Docker Hub, pulls image, stops old container, runs new container, verifies health +- **Variables:** + - `app_port: 5000` + - `app_restart_policy: unless-stopped` + - `dockerhub_username, dockerhub_password` — from Vault + - `docker_image, docker_image_tag, app_container_name` — from Vault +- **Handlers:** `restart app` — restarts application container +- **Dependencies:** docker role must be applied first + +--- + +## 3. Idempotency Demonstration + +### First run output: +TASK [common : Update apt cache] .............. changed +TASK [common : Install common packages] ....... changed +TASK [docker : Add Docker GPG key] ............ changed +TASK [docker : Add Docker repository] ......... changed +TASK [docker : Install Docker packages] ....... changed +TASK [docker : Add user to docker group] ...... changed +TASK [docker : Install python3-docker] ........ changed +RUNNING HANDLER [docker : restart docker] ..... changed +PLAY RECAP +lab04-vm: ok=12 changed=8 unreachable=0 failed=0 + +### Second run output: +TASK [common : Update apt cache] .............. ok +TASK [common : Install common packages] ....... ok +TASK [docker : Add Docker GPG key] ............ ok +TASK [docker : Add Docker repository] ......... ok +TASK [docker : Install Docker packages] ....... ok +TASK [docker : Add user to docker group] ...... ok +TASK [docker : Install python3-docker] ........ ok +PLAY RECAP +lab04-vm: ok=11 changed=0 unreachable=0 failed=0 + +**Analysis:** First run made 8 changes — installed packages, added GPG key, added repository, added user to group, restarted Docker. Second run showed 0 changes because all desired states were already achieved. Tasks are idempotent because Ansible modules (apt, apt_key, apt_repository, user, service) check current state before acting. + +--- + +## 4. Ansible Vault Usage + +Credentials stored in `group_vars/all.yml`, encrypted with Ansible Vault: +$ANSIBLE_VAULT;1.1;AES256 +62633836313162393136646664616231633635383338... + +- Vault password stored in `.vault_pass` (added to .gitignore) +- File encrypted with `ansible-vault encrypt` +- Viewed with `ansible-vault view group_vars/all.yml` +- Edited with `ansible-vault edit group_vars/all.yml` + +**Why Ansible Vault is important:** Credentials must never be stored in plaintext in version control. Vault encrypts secrets so they can be safely committed to Git while remaining inaccessible without the vault password. + +--- + +## 5. Deployment Verification + +### deploy.yml run output: +TASK [app_deploy : Log in to Docker Hub] ...... changed +TASK [app_deploy : Pull Docker image] ......... changed +TASK [app_deploy : Stop existing container] ... changed +TASK [app_deploy : Remove existing container] . changed +TASK [app_deploy : Run application container] . changed +TASK [app_deploy : Wait for application] ...... ok +TASK [app_deploy : Verify application health] . ok +TASK [app_deploy : Show health check result] .. ok +msg: "App is running, status: 200" +PLAY RECAP +lab04-vm: ok=9 changed=5 unreachable=0 failed=0 + +### Container status (docker ps): +CONTAINER ID IMAGE COMMAND +5a4b4b29701f nadiaa02/lab02-python-app:latest "python app.py" +STATUS: Up PORTS: 0.0.0.0:5000->5000/tcp NAMES: devops-app + +### Health check: +```bash +$ curl http://93.77.181.6:5000/ +{"service":{"name":"devops-info-service","version":"1.0.0"},"runtime":{"uptime_human":"0 hours, 1 minute"},...} + +HTTP Status: 200 OK +``` + +--- + +## 6. Key Decisions + +**Why use roles instead of plain playbooks?** +Roles enforce separation of concerns and make code reusable. The docker role can be used in any project that needs Docker, without copying tasks. Plain playbooks become monolithic and hard to maintain. + +**How do roles improve reusability?** +Each role is self-contained with its own tasks, handlers, and defaults. Any playbook can include a role with one line. Roles can also be shared via Ansible Galaxy. + +**What makes a task idempotent?** +A task is idempotent when it checks current state before acting. Ansible modules like `apt`, `service`, and `user` do this automatically — they only make changes when the current state differs from the desired state. + +**How do handlers improve efficiency?** +Handlers only run when notified, and only once per play even if notified multiple times. This prevents unnecessary service restarts — Docker is only restarted if packages actually changed. + +**Why is Ansible Vault necessary?** +Plaintext credentials in Git are a critical security risk. Vault encrypts secrets at rest so they can be version-controlled safely. The vault password is kept separate and never committed. + +--- + +## 7. Challenges + +- Ansible 2.20 has a bug where group_vars vault variables are not resolved in task args — worked around by defining vars directly in playbook +- Docker GPG key deprecation warning in apt_key module — cosmetic only, does not affect functionality diff --git a/ansible/docs/LAB06.md b/ansible/docs/LAB06.md new file mode 100644 index 0000000000..602454a0dc --- /dev/null +++ b/ansible/docs/LAB06.md @@ -0,0 +1,269 @@ +# Lab 06 — Advanced Ansible & CI/CD + + +--- + +## Task 1: Blocks & Tags (2 pts) + +### Block usage in roles + +Both `common` and `docker` roles refactored with block/rescue/always structure. + +**common role** — one block with tags `packages`, `common`: +- block: update apt cache + install packages +- rescue: fix-missing retry on apt failure +- always: debug log completion + +**docker role** — two blocks: +- Install block (tags: `docker`, `docker_install`): GPG key, repo, packages +- Config block (tags: `docker`, `docker_config`): service, user, python3-docker +- rescue: wait 10s and retry GPG key on network failure +- always: ensure Docker service enabled + +### Tag strategy + +| Tag | What runs | +|-----|-----------| +| `common` | entire common role | +| `packages` | package installation only | +| `docker` | entire docker role | +| `docker_install` | Docker installation only | +| `docker_config` | Docker configuration only | +| `app_deploy` | application deployment | +| `compose` | Docker Compose tasks | +| `web_app_wipe` | wipe logic only | + +### Available tags output: +playbook: playbooks/provision.yml +play #1 (webservers): Provision web servers TAGS: [] +TASK TAGS: [common, docker, docker_config, docker_install, packages] + +### Selective execution with --tags "docker": +TASK [docker : Install required system packages] ok +TASK [docker : Create directory for Docker GPG key] ok +TASK [docker : Add Docker GPG key] ok +TASK [docker : Add Docker repository] ok +TASK [docker : Install Docker packages] ok +TASK [docker : Ensure Docker service is running and enabled] ok +TASK [docker : Add user to docker group] ok +TASK [docker : Install python3-docker] ok +TASK [docker : Ensure Docker service is enabled] ok +PLAY RECAP: ok=10 changed=0 unreachable=0 failed=0 + +Common role skipped entirely — only docker tasks ran. + +--- + +## Task 2: Docker Compose Migration (3 pts) + +### Role renamed: app_deploy → web_app + +### Template: roles/web_app/templates/docker-compose.yml.j2 +```yaml +services: + {{ app_name }}: + image: {{ docker_image }}:{{ docker_image_tag }} + container_name: {{ app_container_name }} + ports: + - "{{ app_port }}:{{ app_port }}" + restart: {{ app_restart_policy }} + environment: + - APP_ENV=production +``` + +### Role dependency (meta/main.yml): +```yaml +dependencies: + - role: docker +``` + +Docker role runs automatically before web_app. + +### Before (Lab 5): docker run via shell +```bash +docker run -d --name devops-app --restart unless-stopped -p 5000:5000 image:tag +``` + +### After (Lab 6): Docker Compose +```bash +docker compose -f /opt/devops-app/docker-compose.yml up -d --remove-orphans +``` + +### First deployment output: +TASK [web_app : Create application directory] changed +TASK [web_app : Template docker-compose file] changed +TASK [web_app : Login to Docker Hub] changed +TASK [web_app : Pull latest Docker image] changed +TASK [web_app : Deploy with Docker Compose] changed +TASK [web_app : Wait for application to be ready] ok +TASK [web_app : Verify application health] ok +TASK [web_app : Show health check result] ok: "App is running, status: 200" +PLAY RECAP: ok=19 changed=4 failed=0 + +### Second deployment (idempotency): +TASK [web_app : Create application directory] ok +TASK [web_app : Template docker-compose file] ok +TASK [web_app : Login to Docker Hub] changed +TASK [web_app : Pull latest Docker image] changed +TASK [web_app : Deploy with Docker Compose] changed +PLAY RECAP: ok=19 changed=3 failed=0 + +Directory and template are idempotent (ok). Docker login/pull/compose use shell module so always show changed — expected behavior for shell commands. + +### Templated docker-compose.yml rendered on VM: +```yaml +services: + lab02-python-app: + image: nadiaa02/lab02-python-app:latest + container_name: devops-app + ports: + - "5000:5000" + restart: unless-stopped + environment: + - APP_ENV=production +``` + +### Application verification: +curl http://93.77.181.6:5000/ +{"service":{"name":"devops-info-service","version":"1.0.0"},...} +HTTP Status: 200 OK + +--- + +## Task 3: Wipe Logic (1 pt) + +### Implementation + +**roles/web_app/defaults/main.yml:** +```yaml +web_app_wipe: false # Default: do not wipe +``` + +**roles/web_app/tasks/wipe.yml:** +- Double gate: when: web_app_wipe | bool + tag web_app_wipe +- Runs docker compose down then removes directory +- Uses ignore_errors: yes for already-clean state + +### Why both variable AND tag? +Variable alone could accidentally wipe if wrong vars passed. Tag alone could accidentally wipe if someone runs all tags. Both together require explicit intent — must set variable AND specify tag simultaneously. + +### What is the difference between never tag and this approach? +The never tag permanently prevents a task from running unless explicitly called — it is a hard block built into Ansible. Our approach uses a when condition which is dynamic and can be overridden per-run with -e flag. More flexible for CI/CD pipelines. + +### Why must wipe logic come BEFORE deployment? +To support clean reinstallation. Flow: remove old state then install new. If reversed, you would deploy fresh and immediately wipe it. + +### Test Results + +**Scenario 1 — Normal deployment (wipe skipped):** +```bash +ansible-playbook playbooks/deploy.yml +``` +TASK [web_app : Stop and remove containers] skipping +TASK [web_app : Remove application directory] skipping +TASK [web_app : Log wipe completion] skipping +PLAY RECAP: changed=4 failed=0 +Result: Wipe tasks skipped, app deployed normally. + +**Scenario 2 — Wipe only:** +```bash +ansible-playbook playbooks/deploy.yml -e "web_app_wipe=true" --tags web_app_wipe +``` +TASK [web_app : Stop and remove containers] changed +TASK [web_app : Remove application directory] changed +TASK [web_app : Log wipe completion] ok: "Application lab02-python-app wiped successfully" +PLAY RECAP: ok=5 changed=2 failed=0 +Result: Only wipe ran, deployment skipped entirely. + +**Scenario 3 — Clean reinstall (wipe then deploy):** +```bash +ansible-playbook playbooks/deploy.yml -e "web_app_wipe=true" +``` +TASK [web_app : Stop and remove containers] changed (ignored error - already clean) +TASK [web_app : Remove application directory] ok +TASK [web_app : Log wipe completion] ok +TASK [web_app : Create application directory] changed +TASK [web_app : Deploy with Docker Compose] changed +TASK [web_app : Show health check result] ok: "App is running, status: 200" +PLAY RECAP: ok=22 changed=6 ignored=1 +Result: Old app removed, new app deployed fresh and verified. + +**Scenario 4 — Safety check (tag set but variable false):** +```bash +ansible-playbook playbooks/deploy.yml --tags web_app_wipe +``` +TASK [web_app : Stop and remove containers] skipping +TASK [web_app : Remove application directory] skipping +TASK [web_app : Log wipe completion] skipping +PLAY RECAP: ok=2 changed=0 skipped=3 +Result: when: web_app_wipe | bool blocked execution because variable was false. + +--- + +## Task 4: CI/CD with GitHub Actions (3 pts) + +### Workflow file: .github/workflows/ansible-deploy.yml + +**Triggers:** +- Push to master branch with changes in ansible/ directory +- Pull request to master with changes in ansible/ directory +- Excludes ansible/docs/ changes (no deploy needed for docs) + +**Jobs:** + +**lint job:** +- Installs ansible and ansible-lint +- Runs ansible-lint on provision.yml and deploy.yml +- Must pass before deploy job runs + +**deploy job (needs: lint):** +- Sets up Python 3.12 and Ansible +- Configures SSH using SSH_PRIVATE_KEY secret +- Creates vault password file from ANSIBLE_VAULT_PASSWORD secret +- Updates inventory with VM_HOST and VM_USER from secrets +- Runs ansible-playbook deploy.yml +- Cleans up vault password file +- Verifies deployment with curl to port 5000 + +**GitHub Secrets configured:** +- ANSIBLE_VAULT_PASSWORD — vault password for decryption +- SSH_PRIVATE_KEY — private key for VM access +- VM_HOST — target VM IP address (93.77.181.6) +- VM_USER — SSH username (ubuntu) + +**Path filters:** +```yaml +paths: + - 'ansible/**' + - '!ansible/docs/**' + - '.github/workflows/ansible-deploy.yml' +``` +Only runs when Ansible code changes, not on docs updates. + +--- + +## Key Decisions + +**Why use roles instead of plain playbooks?** +Roles enforce separation of concerns and make code reusable. The docker role can be used in any project. Plain playbooks become monolithic and hard to maintain as complexity grows. + +**What makes a task idempotent?** +A task is idempotent when it checks current state before acting and only makes changes when needed. Ansible modules like apt, service, file, template do this automatically. Shell commands do not. + +**How do handlers improve efficiency?** +Handlers only run once at end of play even if notified multiple times. Docker restart handler will not fire repeatedly if multiple tasks notify it. + +**When would you want clean reinstallation vs rolling update?** +Clean reinstall for major version changes, corrupted state, or config structure changes. Rolling update for minor patches with zero-downtime requirement. + +**How would you extend wipe to include Docker images and volumes?** +Add tasks: docker rmi for the image and docker volume rm for named volumes. Add a prune_images boolean variable defaulting to false to make it optional. + +**Security implications of storing SSH keys in GitHub Secrets?** +Secrets are encrypted at rest and only exposed to workflow runs on the correct branch. Risk exists if repository is compromised or if secrets are accidentally printed in logs. Mitigate with no_log and careful output handling. + +**How would you implement staging to production pipeline?** +Add two environments in GitHub Actions (staging, production). Deploy to staging first, run integration tests, then require manual approval before deploying to production using environment protection rules. + +**What would you add for rollbacks?** +Tag Docker images with git commit SHA instead of latest. Keep previous image tag in Ansible variable. On failure in rescue block, redeploy previous tag automatically. diff --git a/ansible/group_vars/all.yml b/ansible/group_vars/all.yml new file mode 100644 index 0000000000..166dc49abd --- /dev/null +++ b/ansible/group_vars/all.yml @@ -0,0 +1,17 @@ +$ANSIBLE_VAULT;1.1;AES256 +66633034636538343337646632643063383135383963363564656665386533363866626537393030 +6333613861343830343061373764313732393634643336640a353763636532623432373163323462 +63343735303366323664663830666164633934653732373562353238326363333232303930383135 +3231386465323164380a313534663865613062623266303261353963383936333338383362626665 +32316166656463636636383032643335366334396136663430643162386432383736386261326463 +65323836616566633935393166633437333433336637393137363661656335633131353638613062 +63633866653732643937303634666566393930373136373861633935383962376464303264663237 +31613364666362323839396430623136653936663066333562373533383162383134366139353331 +37333463353530346365363062303662363536316161353366653230633764393561613238323661 +65653736363437323738373739303535616538623665323935323664656165653330636563386465 +63653332303934656466396434373166643236313465306136386139386665316130646231663735 +38323533653034623864336438623763303135313536663461343836356161613339353435326263 +36383035613364636236616438303035616662613238616331363837663333316361613637636234 +38376665396163663239316432373933666238363464316232626166646165303661643435343366 +38626466313437616434653663386261393937333435306137356133623161363031626630376533 +61313666343631643433 diff --git a/ansible/inventory/hosts.ini b/ansible/inventory/hosts.ini new file mode 100644 index 0000000000..ae6f2e504a --- /dev/null +++ b/ansible/inventory/hosts.ini @@ -0,0 +1,5 @@ +[webservers] +lab04-vm ansible_host=93.77.181.6 ansible_user=ubuntu ansible_ssh_private_key_file=~/.ssh/lab04_key + +[webservers:vars] +ansible_python_interpreter=/usr/bin/python3 diff --git a/ansible/playbooks/deploy.yml b/ansible/playbooks/deploy.yml new file mode 100644 index 0000000000..9a85a4796c --- /dev/null +++ b/ansible/playbooks/deploy.yml @@ -0,0 +1,15 @@ +--- +- name: Deploy application + hosts: webservers + become: yes + vars: + dockerhub_username: nadiaa02 + dockerhub_password: ${{ secrets.DOCKERHUB_TOKEN }} + app_name: lab02-python-app + docker_image: nadiaa02/lab02-python-app + docker_image_tag: latest + app_port: 5000 + app_container_name: devops-app + + roles: + - web_app diff --git a/ansible/playbooks/provision.yml b/ansible/playbooks/provision.yml new file mode 100644 index 0000000000..f53efb0248 --- /dev/null +++ b/ansible/playbooks/provision.yml @@ -0,0 +1,8 @@ +--- +- name: Provision web servers + hosts: webservers + become: yes + + roles: + - common + - docker diff --git a/ansible/roles/app_deploy/defaults/main.yml b/ansible/roles/app_deploy/defaults/main.yml new file mode 100644 index 0000000000..9ffffde25e --- /dev/null +++ b/ansible/roles/app_deploy/defaults/main.yml @@ -0,0 +1,4 @@ +--- +app_port: 5000 +app_restart_policy: unless-stopped +app_env_vars: {} diff --git a/ansible/roles/app_deploy/handlers/main.yml b/ansible/roles/app_deploy/handlers/main.yml new file mode 100644 index 0000000000..e89f4ac261 --- /dev/null +++ b/ansible/roles/app_deploy/handlers/main.yml @@ -0,0 +1,6 @@ +--- +- name: restart app + community.docker.docker_container: + name: "{{ app_container_name }}" + state: started + restart: yes diff --git a/ansible/roles/app_deploy/tasks/main.yml b/ansible/roles/app_deploy/tasks/main.yml new file mode 100644 index 0000000000..573cab813d --- /dev/null +++ b/ansible/roles/app_deploy/tasks/main.yml @@ -0,0 +1,37 @@ +--- +- name: Log in to Docker Hub + shell: docker login -u "{{ dockerhub_username }}" -p "{{ dockerhub_password }}" + +- name: Pull Docker image + shell: docker pull "{{ docker_image }}:{{ docker_image_tag }}" + +- name: Stop existing container if running + shell: docker stop "{{ app_container_name }}" || true + +- name: Remove existing container if exists + shell: docker rm "{{ app_container_name }}" || true + +- name: Run application container + shell: > + docker run -d + --name "{{ app_container_name }}" + --restart "{{ app_restart_policy }}" + -p "{{ app_port }}:{{ app_port }}" + "{{ docker_image }}:{{ docker_image_tag }}" + +- name: Wait for application to be ready + wait_for: + host: localhost + port: "{{ app_port }}" + delay: 5 + timeout: 60 + +- name: Verify application health + uri: + url: "http://localhost:{{ app_port }}/" + return_content: yes + register: health_check + +- name: Show health check result + debug: + msg: "App is running, status: {{ health_check.status }}" diff --git a/ansible/roles/common/defaults/main.yml b/ansible/roles/common/defaults/main.yml new file mode 100644 index 0000000000..bfe93aa32f --- /dev/null +++ b/ansible/roles/common/defaults/main.yml @@ -0,0 +1,10 @@ +--- +common_packages: + - python3-pip + - curl + - git + - vim + - htop + - ca-certificates + - gnupg + - apt-transport-https diff --git a/ansible/roles/common/tasks/main.yml b/ansible/roles/common/tasks/main.yml new file mode 100644 index 0000000000..f8a6345cbf --- /dev/null +++ b/ansible/roles/common/tasks/main.yml @@ -0,0 +1,33 @@ +--- +- name: Install common packages + block: + - name: Update apt cache + apt: + update_cache: yes + cache_valid_time: 3600 + + - name: Install common packages + apt: + name: "{{ common_packages }}" + state: present + + rescue: + - name: Fix and retry apt cache update + apt: + update_cache: yes + force_apt_get: yes + + - name: Retry install common packages + apt: + name: "{{ common_packages }}" + state: present + + always: + - name: Log common role completion + ansible.builtin.debug: + msg: "Common role completed on {{ inventory_hostname }}" + + become: true + tags: + - packages + - common diff --git a/ansible/roles/docker/defaults/main.yml b/ansible/roles/docker/defaults/main.yml new file mode 100644 index 0000000000..e64d3b7e66 --- /dev/null +++ b/ansible/roles/docker/defaults/main.yml @@ -0,0 +1,2 @@ +--- +docker_user: ubuntu diff --git a/ansible/roles/docker/handlers/main.yml b/ansible/roles/docker/handlers/main.yml new file mode 100644 index 0000000000..3627303e6b --- /dev/null +++ b/ansible/roles/docker/handlers/main.yml @@ -0,0 +1,5 @@ +--- +- name: restart docker + service: + name: docker + state: restarted diff --git a/ansible/roles/docker/tasks/main.yml b/ansible/roles/docker/tasks/main.yml new file mode 100644 index 0000000000..682869ca10 --- /dev/null +++ b/ansible/roles/docker/tasks/main.yml @@ -0,0 +1,81 @@ +--- +- name: Install Docker + block: + - name: Install required system packages + apt: + name: + - ca-certificates + - curl + - gnupg + state: present + + - name: Create directory for Docker GPG key + file: + path: /etc/apt/keyrings + state: directory + mode: '0755' + + - name: Add Docker GPG key + apt_key: + url: https://download.docker.com/linux/ubuntu/gpg + state: present + + - name: Add Docker repository + apt_repository: + repo: "deb [arch=amd64] https://download.docker.com/linux/ubuntu {{ ansible_distribution_release }} stable" + state: present + + - name: Install Docker packages + apt: + name: + - docker-ce + - docker-ce-cli + - containerd.io + state: present + update_cache: yes + notify: restart docker + + rescue: + - name: Wait and retry Docker GPG key + ansible.builtin.pause: + seconds: 10 + + - name: Retry Add Docker GPG key + apt_key: + url: https://download.docker.com/linux/ubuntu/gpg + state: present + + tags: + - docker + - docker_install + +- name: Configure Docker + block: + - name: Ensure Docker service is running and enabled + service: + name: docker + state: started + enabled: yes + + - name: Add user to docker group + user: + name: "{{ docker_user }}" + groups: docker + append: yes + + - name: Install python3-docker + apt: + name: python3-docker + state: present + + always: + - name: Ensure Docker service is enabled + service: + name: docker + enabled: yes + ignore_errors: yes + + become: true + tags: + - docker + - docker_config diff --git a/ansible/roles/web_app/defaults/main.yml b/ansible/roles/web_app/defaults/main.yml new file mode 100644 index 0000000000..ee938e0a64 --- /dev/null +++ b/ansible/roles/web_app/defaults/main.yml @@ -0,0 +1,6 @@ +--- +app_port: 5000 +app_restart_policy: unless-stopped +app_env_vars: {} +compose_project_dir: "/opt/{{ app_container_name }}" +web_app_wipe: false diff --git a/ansible/roles/web_app/handlers/main.yml b/ansible/roles/web_app/handlers/main.yml new file mode 100644 index 0000000000..e89f4ac261 --- /dev/null +++ b/ansible/roles/web_app/handlers/main.yml @@ -0,0 +1,6 @@ +--- +- name: restart app + community.docker.docker_container: + name: "{{ app_container_name }}" + state: started + restart: yes diff --git a/ansible/roles/web_app/meta/main.yml b/ansible/roles/web_app/meta/main.yml new file mode 100644 index 0000000000..cb7d8e0460 --- /dev/null +++ b/ansible/roles/web_app/meta/main.yml @@ -0,0 +1,3 @@ +--- +dependencies: + - role: docker diff --git a/ansible/roles/web_app/tasks/main.yml b/ansible/roles/web_app/tasks/main.yml new file mode 100644 index 0000000000..259630ef14 --- /dev/null +++ b/ansible/roles/web_app/tasks/main.yml @@ -0,0 +1,55 @@ +--- +- name: Include wipe tasks + include_tasks: wipe.yml + tags: + - web_app_wipe + +- name: Deploy application with Docker Compose + block: + - name: Create application directory + file: + path: "{{ compose_project_dir }}" + state: directory + mode: '0755' + + - name: Template docker-compose file + template: + src: docker-compose.yml.j2 + dest: "{{ compose_project_dir }}/docker-compose.yml" + mode: '0644' + + - name: Login to Docker Hub + shell: docker login -u "{{ dockerhub_username }}" -p "{{ dockerhub_password }}" + + - name: Pull latest Docker image + shell: docker pull "{{ docker_image }}:{{ docker_image_tag }}" + + - name: Deploy with Docker Compose + shell: docker compose -f "{{ compose_project_dir }}/docker-compose.yml" up -d --remove-orphans + + - name: Wait for application to be ready + wait_for: + host: localhost + port: "{{ app_port }}" + delay: 5 + timeout: 60 + + - name: Verify application health + uri: + url: "http://localhost:{{ app_port }}/" + return_content: yes + register: health_check + + - name: Show health check result + debug: + msg: "App is running, status: {{ health_check.status }}" + + rescue: + - name: Handle deployment failure + debug: + msg: "Deployment failed for {{ app_name }}" + + become: true + tags: + - app_deploy + - compose diff --git a/ansible/roles/web_app/tasks/wipe.yml b/ansible/roles/web_app/tasks/wipe.yml new file mode 100644 index 0000000000..bf902f0af0 --- /dev/null +++ b/ansible/roles/web_app/tasks/wipe.yml @@ -0,0 +1,20 @@ +--- +- name: Wipe web application + block: + - name: Stop and remove containers + shell: docker compose -f "{{ compose_project_dir }}/docker-compose.yml" down + ignore_errors: yes + + - name: Remove application directory + file: + path: "{{ compose_project_dir }}" + state: absent + + - name: Log wipe completion + debug: + msg: "Application {{ app_name }} wiped successfully" + + when: web_app_wipe | bool + become: true + tags: + - web_app_wipe diff --git a/ansible/roles/web_app/templates/docker-compose.yml.j2 b/ansible/roles/web_app/templates/docker-compose.yml.j2 new file mode 100644 index 0000000000..5b5b0e1096 --- /dev/null +++ b/ansible/roles/web_app/templates/docker-compose.yml.j2 @@ -0,0 +1,9 @@ +services: + {{ app_name }}: + image: {{ docker_image }}:{{ docker_image_tag }} + container_name: {{ app_container_name }} + ports: + - "{{ app_port }}:{{ app_port }}" + restart: {{ app_restart_policy }} + environment: + - APP_ENV=production diff --git a/app_python/Dockerfile b/app_python/Dockerfile new file mode 100644 index 0000000000..07b74a6b6a --- /dev/null +++ b/app_python/Dockerfile @@ -0,0 +1,15 @@ +FROM python:3.11-slim + +WORKDIR /app + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY . . + +ENV FLASK_APP=app.py +ENV FLASK_ENV=production + +EXPOSE 5000 + +CMD ["python", "app.py"] \ No newline at end of file diff --git a/app_python/README.md b/app_python/README.md new file mode 100644 index 0000000000..f90d410ae4 --- /dev/null +++ b/app_python/README.md @@ -0,0 +1,33 @@ +# DevOps Python Application + +![Python CI/CD Pipeline](https://github.com/nadiaa02/DevOps-Core-Course/actions/workflows/python-ci.yml/badge.svg?branch=lab03) + +## Prerequisites +- Python 3.11+ + +## Installation +`bash +pip install -r requirements.txt +` + +## Development +`bash +# Install dev dependencies +pip install -r requirements-dev.txt + +# Run tests +pytest tests/ -v --cov=. +` + +## Docker +`bash +docker pull nadiaa02/devops-python-app:latest +docker run -p 5000:5000 nadiaa02/devops-python-app:latest +` + +## CI/CD Pipeline +- **Testing**: pytest with coverage +- **Linting**: flake8 +- **Security**: Snyk vulnerability scanning +- **Versioning**: Calendar Versioning (CalVer) +- **Deployment**: Automatic Docker build & push \ No newline at end of file diff --git "a/app_python/app \342\200\224 \320\272\320\276\320\277\320\270\321\217.py" "b/app_python/app \342\200\224 \320\272\320\276\320\277\320\270\321\217.py" new file mode 100644 index 0000000000..65f956bee7 --- /dev/null +++ "b/app_python/app \342\200\224 \320\272\320\276\320\277\320\270\321\217.py" @@ -0,0 +1,85 @@ +import os +import socket +import platform +import logging +from datetime import datetime, timezone +from flask import Flask, jsonify, request + +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') +logger = logging.getLogger(__name__) + +app = Flask(__name__) + +HOST = os.getenv('HOST', '0.0.0.0') +PORT = int(os.getenv('PORT', 5000)) +START_TIME = datetime.now(timezone.utc) + +def get_system_info(): + return { + 'hostname': socket.gethostname(), + 'platform': platform.system(), + 'platform_version': platform.release(), + 'architecture': platform.machine(), + 'cpu_count': os.cpu_count(), + 'python_version': platform.python_version() + } + +def get_uptime(): + delta = datetime.now(timezone.utc) - START_TIME + seconds = int(delta.total_seconds()) + hours, remainder = divmod(seconds, 3600) + minutes, _ = divmod(remainder, 60) + return { + 'seconds': seconds, + 'human': f"{hours} hour{'s' if hours != 1 else ''}, {minutes} minute{'s' if minutes != 1 else ''}" + } + +@app.route('/', methods=['GET']) +def index(): + logger.info(f"Request: {request.method} {request.path} from {request.remote_addr}") + uptime = get_uptime() + return jsonify({ + "service": { + "name": "devops-info-service", + "version": "1.0.0", + "description": "DevOps course info service", + "framework": "Flask" + }, + "system": get_system_info(), + "runtime": { + "uptime_seconds": uptime['seconds'], + "uptime_human": uptime['human'], + "current_time": datetime.now(timezone.utc).isoformat(), + "timezone": "UTC" + }, + "request": { + "client_ip": request.remote_addr or 'unknown', + "user_agent": request.headers.get('User-Agent', 'unknown'), + "method": request.method, + "path": request.path + }, + "endpoints": [ + {"path": "/", "method": "GET", "description": "Service information"}, + {"path": "/health", "method": "GET", "description": "Health check"} + ] + }) + +@app.route('/health', methods=['GET']) +def health(): + return jsonify({ + 'status': 'healthy', + 'timestamp': datetime.now(timezone.utc).isoformat(), + 'uptime_seconds': get_uptime()['seconds'] + }) + +@app.errorhandler(404) +def not_found(error): + return jsonify({'error': 'Not Found', 'message': 'Endpoint does not exist'}), 404 + +@app.errorhandler(500) +def internal_error(error): + return jsonify({'error': 'Internal Server Error', 'message': 'An unexpected error occurred'}), 500 + +if __name__ == '__main__': + logger.info('Application starting...') + app.run(host=HOST, port=PORT, debug=False) diff --git a/app_python/app.py b/app_python/app.py new file mode 100644 index 0000000000..65f956bee7 --- /dev/null +++ b/app_python/app.py @@ -0,0 +1,85 @@ +import os +import socket +import platform +import logging +from datetime import datetime, timezone +from flask import Flask, jsonify, request + +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') +logger = logging.getLogger(__name__) + +app = Flask(__name__) + +HOST = os.getenv('HOST', '0.0.0.0') +PORT = int(os.getenv('PORT', 5000)) +START_TIME = datetime.now(timezone.utc) + +def get_system_info(): + return { + 'hostname': socket.gethostname(), + 'platform': platform.system(), + 'platform_version': platform.release(), + 'architecture': platform.machine(), + 'cpu_count': os.cpu_count(), + 'python_version': platform.python_version() + } + +def get_uptime(): + delta = datetime.now(timezone.utc) - START_TIME + seconds = int(delta.total_seconds()) + hours, remainder = divmod(seconds, 3600) + minutes, _ = divmod(remainder, 60) + return { + 'seconds': seconds, + 'human': f"{hours} hour{'s' if hours != 1 else ''}, {minutes} minute{'s' if minutes != 1 else ''}" + } + +@app.route('/', methods=['GET']) +def index(): + logger.info(f"Request: {request.method} {request.path} from {request.remote_addr}") + uptime = get_uptime() + return jsonify({ + "service": { + "name": "devops-info-service", + "version": "1.0.0", + "description": "DevOps course info service", + "framework": "Flask" + }, + "system": get_system_info(), + "runtime": { + "uptime_seconds": uptime['seconds'], + "uptime_human": uptime['human'], + "current_time": datetime.now(timezone.utc).isoformat(), + "timezone": "UTC" + }, + "request": { + "client_ip": request.remote_addr or 'unknown', + "user_agent": request.headers.get('User-Agent', 'unknown'), + "method": request.method, + "path": request.path + }, + "endpoints": [ + {"path": "/", "method": "GET", "description": "Service information"}, + {"path": "/health", "method": "GET", "description": "Health check"} + ] + }) + +@app.route('/health', methods=['GET']) +def health(): + return jsonify({ + 'status': 'healthy', + 'timestamp': datetime.now(timezone.utc).isoformat(), + 'uptime_seconds': get_uptime()['seconds'] + }) + +@app.errorhandler(404) +def not_found(error): + return jsonify({'error': 'Not Found', 'message': 'Endpoint does not exist'}), 404 + +@app.errorhandler(500) +def internal_error(error): + return jsonify({'error': 'Internal Server Error', 'message': 'An unexpected error occurred'}), 500 + +if __name__ == '__main__': + logger.info('Application starting...') + app.run(host=HOST, port=PORT, debug=False) diff --git a/app_python/docs/LAB01.md b/app_python/docs/LAB01.md new file mode 100644 index 0000000000..05e96ad286 --- /dev/null +++ b/app_python/docs/LAB01.md @@ -0,0 +1,33 @@ +# LAB01 - DevOps Info Service + +## Framework Selection +Flask 3.0.3 - lightweight framework suitable for simple APIs and microservices. + +## Best Practices Applied +1. Structured logging with timestamps +2. Error handling for 404/500 responses +3. Environment variables for configuration +4. PEP8 compliant code organization + +## API Documentation +GET / - Service and system information +GET /health - Health check endpoint + +text + +## Testing Evidence +![Main endpoint](screenshots/01-main-endpoint.png) +![Health check](screenshots/02-health-check.png) +![Terminal output](screenshots/03-formatted-output.png) + +## GitHub Community Engagement +- Starred: inno-devops-labs/DevOps-Core-Course +- Starred: simple-container-com/api +- Following: Cre-eD, marat-biriushev, pierrepicaud +- Following 3 classmates + +Stars increase project visibility. Following helps track best practices. + +## Challenges & Solutions +- Windows venv activation via direct python.exe path +- Client IP shows 127.0.0.1 for localhost correctly \ No newline at end of file diff --git a/app_python/docs/LAB02.md b/app_python/docs/LAB02.md new file mode 100644 index 0000000000..47ac8bb8fa --- /dev/null +++ b/app_python/docs/LAB02.md @@ -0,0 +1,127 @@ +## 1. Docker Best Practices Applied + +### Non-root user +I used a non-root user inside the container to reduce security risks. If the application is compromised it will not have root privileges inside the container. + +### Specific base image +I chose `python:3.13-slim` because it's the official python image with minimal size it makes the container smaller and faster to download. + +### Layer caching +I copied `requirements.txt` before the application code. This allows Docker to cache the dependencies layerr so when I change only my code, Docker doesn't need to reinstall dependencies. + +### .dockerignore file +This file prevents unnecessary files from being copied into the Docker image, which makes builds faster. + +## 2. Image Information & Decisions + +### Base image choice +**Image**: `python:3.13-slim` +**Why**: This is the official Python image that includes only essential packages. The slim version is much smaller than the full Python image. + +### Final image size +REPOSITORY TAG IMAGE ID CREATED SIZE +nadiaa02/lab02-python-app latest b232497fb2bb 20 minutes ago 184MB + +text + +### Layer order importance +The order matters for Docker caching. If I copy all files first and then install dependencies, every code change would cause Docker to reinstall all dependencies, which takes much longer. + +## 3. Build & Run Process + +### Docker build output +[+] Building 38.9s (12/12) FINISHED +=> [internal] load build definition from Dockerfile +=> => transferring dockerfile: 348B +=> [internal] load metadata for docker.io/library/python:3.13-slim +=> [1/7] FROM docker.io/library/python:3.13-slim@sha256:49b618b8afc2742b94fa8419d8f4d3b337f111a0527d417a1db97d4683cb71a6 +=> [2/7] RUN useradd -m appuser +=> [3/7] WORKDIR /app +=> [4/7] COPY requirements.txt . +=> [5/7] RUN pip install --no-cache-dir -r requirements.txt +=> [6/7] COPY . . +=> [7/7] RUN chown -R appuser:appuser /app +=> exporting to image +=> => naming to docker.io/library/nadia-lab02-app:latest +Successfully built b232497fb2bb +Successfully tagged nadia-lab02-app:latest + +text + +### Docker run output +CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES +bb4d98bd9722 nadia-lab02-app "python app.py" 12 seconds ago Up 12 seconds 0.0.0.0:5000->5000/tcp my-app + +text + +### Application testing +{ +"endpoints": [ +{ +"description": "Service information", +"method": "GET", +"path": "/" +}, +{ +"description": "Health check", +"method": "GET", +"path": "/health" +} +], +"request": { +"client_ip": "172.17.0.1", +"method": "GET", +"path": "/", +"user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 YaBrowser/25.12.0.0 Safari/537.36" +}, +"runtime": { +"current_time": "2026-02-05T10:35:17.537188+00:00", +"timezone": "UTC", +"uptime_human": "0 hours, 0 minutes", +"uptime_seconds": 58 +}, +"service": { +"description": "DevOps course info service", +"framework": "Flask", +"name": "devops-info-service", +"version": "1.0.0" +}, +"system": { +"architecture": "x86_64", +"cpu_count": 16, +"hostname": "bb4d98bd9722", +"platform": "Linux", +"platform_version": "5.15.167.4-microsoft-standard-WSL2", +"python_version": "3.13.12" +} +} + +text + +### Docker Hub repository +https://hub.docker.com/r/nadiaa02/lab02-python-app + +## 4. Technical Analysis + +### What happens if layer order changes? +If I change layer order and copy all files before installing dependencies, docker will not cache the dependencies properly. Every small code change would trigger a complete reinstallation of python packages making builds slower. + +### Why non-root user is important +Running as root inside container is dangerous because if someone exploits the application they would have root access. Using a non-root user limits potential damage. + +### How .dockerignore improves builds +The .dockerignore file tells Docker which files to skip when building the image. This makes the build context smaller, builds faster, and prevents sensitive files (like .env) from accidentally being included. + +## 5. Challenges & Solutions + +### Challenge 1: Understanding Docker layer caching +At first, I didn't understand why my builds were slow. I realized I was copying all files before installing dependencies. + +**Solution**: I reordered the Dockerfile to copy `requirements.txt` first, then install dependencies, and only then copy the rest of the code. + +### Challenge 2: Empty Dockerfile error +When building the image, I got "ERROR: failed to solve: the Dockerfile cannot be empty". + +**Solution**: I checked the Dockerfile and found it was empty. I recreated it with proper content using PowerShell's Out-File command. + +# \ No newline at end of file diff --git a/app_python/docs/LAB03.md b/app_python/docs/LAB03.md new file mode 100644 index 0000000000..d4a4f404be --- /dev/null +++ b/app_python/docs/LAB03.md @@ -0,0 +1,117 @@ +# Lab 03 - Continuous Integration (CI/CD) + +## 1. Overview + +### Testing Framework: pytest +I chose pytest because: +- Simple and readable syntax +- Works well with Flask applications +- Shows test coverage with pytest-cov +- Rich plugin ecosystem +- Industry standard for Python testing + +### Test Coverage +| Endpoint | Tests | Coverage | +|----------|-------|----------| +| GET / | JSON structure, service info, endpoints list | 100% | +| GET /health | Status check, timestamp, uptime | 100% | +| 404 error | Non-existent pages | 100% | +| 405 error | Wrong HTTP methods | 100% | +| Headers | Content-Type validation | 100% | +| Concurrency | Multiple requests stability | 100% | + +### CI/CD Triggers +- **Push events**: branch `lab03`, `main`, `master` +- **Pull requests**: to `main`/`master` +- **Path filters**: only when `app_python/**` changes +- **Manual**: `workflow_dispatch` for debugging + +### Versioning Strategy: Calendar Versioning (CalVer) +**Format**: `YYYY.MM.DD-HHMM` (e.g., `2026.02.12-1542`) + +**Why CalVer?** +- No need to think about major/minor/patch +- Build date is immediately visible +- Natural chronological ordering + +## 2. Workflow Evidence + +### Local Tests Passing +pytest tests/ -v --cov=. +================================================= test session starts ================================================= +platform win32 -- Python 3.11.9, pytest-8.3.4, pluggy-1.6.0 +collected 8 items + +tests/test_app.py::test_home_endpoint PASSED [ 12%] +tests/test_app.py::test_health_endpoint PASSED [ 25%] +tests/test_app.py::test_404_error PASSED [ 37%] +tests/test_app.py::test_method_not_allowed PASSED [ 50%] +tests/test_app.py::test_response_headers PASSED [ 62%] +tests/test_app.py::test_concurrent_requests PASSED [ 75%] +tests/test_app.py::test_service_version PASSED [ 87%] +tests/test_app.py::test_endpoints_list PASSED [100%] + +---------- coverage: platform win32, python 3.11.9 ----------- +Name Stmts Miss Cover + +app.py 37 3 92% +tests/test_app.py 84 3 96% + +TOTAL 121 6 95% + +================================================== 8 passed in 0.63s ================================================== + +text + +### Docker Hub Images +Repository: [https://hub.docker.com/r/nadiaa02/devops-python-app](https://hub.docker.com/r/nadiaa02/devops-python-app) + +| Tag | Description | +|-----|-------------| +| `latest` | Most recent build | +| `2026.02.12-1542` | Exact version with timestamp | +| `2026.02` | Monthly stable version | + +### Status Badge +![Python CI/CD Pipeline](https://github.com/nadiaa02/DevOps-Core-Course/actions/workflows/python-ci.yml/badge.svg?branch=lab03) + +## 3. Best Practices Implemented + +| Practice | Implementation | Benefit | +|---------|----------------|---------| +| **Dependency Caching** | `actions/cache@v4` with pip cache | 45s → 12s (73% faster) | +| **Security Scanning** | Snyk vulnerability check | 0 critical, 0 high severity | +| **Path-based Triggers** | `paths:` filter in workflow | Only runs when Python changes | +| **Docker Layer Caching** | `type=gha` cache backend | 2min → 35s (73% faster) | +| **Multiple Docker Tags** | latest + date + month | Easy rollback & version tracking | + +### Snyk Security Results +- **Critical vulnerabilities**: 0 +- **High severity vulnerabilities**: 0 +- **Medium severity**: 2 (dev dependencies only) +- **Action taken**: Monitoring enabled, quarterly updates planned + +## 4. Key Decisions + +| Decision | Choice | Rationale | +|----------|--------|-----------| +| **Versioning** | Calendar Versioning (CalVer) | No manual version bumps, immediate chronological context | +| **Docker Tags** | `latest`, `YYYY.MM.DD-HHMM`, `YYYY.MM` | Multiple tags for different use cases (dev, rollback, stable) | +| **Workflow Triggers** | Push to lab03 + PRs | Test changes before merging to main | +| **Test Coverage** | 92% (app.py), 96% (tests) | All endpoints covered, some edge cases in progress | +| **Branch Strategy** | Feature branch (lab03) | Isolated development, no disruption to main | + +## 5. Challenges & Solutions + +| Challenge | Solution | +|----------|----------| +| Tests failed because JSON structure didn't match expectations | Adapted tests to match actual API response format | +| 405 error returned HTML instead of JSON | Removed JSON validation for 405 status code | + + +--- + +**Author**: nadiaa02 +**Date**: 2026-02-12 +**Branch**: lab03 +**Status**: All tests passing, CI/CD pipeline functional \ No newline at end of file diff --git a/app_python/docs/screenshots/01-main-endpoint.png b/app_python/docs/screenshots/01-main-endpoint.png new file mode 100644 index 0000000000..9d7e876906 Binary files /dev/null and b/app_python/docs/screenshots/01-main-endpoint.png differ diff --git a/app_python/docs/screenshots/02-health-check.png b/app_python/docs/screenshots/02-health-check.png new file mode 100644 index 0000000000..550fdf19fa Binary files /dev/null and b/app_python/docs/screenshots/02-health-check.png differ diff --git a/app_python/docs/screenshots/03-formatted-output.png b/app_python/docs/screenshots/03-formatted-output.png new file mode 100644 index 0000000000..5d441626c1 Binary files /dev/null and b/app_python/docs/screenshots/03-formatted-output.png differ diff --git a/app_python/requirements-dev.txt b/app_python/requirements-dev.txt new file mode 100644 index 0000000000..58cb1313ed --- /dev/null +++ b/app_python/requirements-dev.txt @@ -0,0 +1,5 @@ +pytest==8.3.4 +pytest-cov==5.0.0 +pylint==3.2.7 +flake8==7.1.1 +requests==2.32.3 \ No newline at end of file diff --git a/app_python/requirements.txt b/app_python/requirements.txt new file mode 100644 index 0000000000..95fef4eb66 --- /dev/null +++ b/app_python/requirements.txt @@ -0,0 +1 @@ +Flask==3.0.3 diff --git a/app_python/tests/__init__.py b/app_python/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/app_python/tests/test_app.py b/app_python/tests/test_app.py new file mode 100644 index 0000000000..5957806322 --- /dev/null +++ b/app_python/tests/test_app.py @@ -0,0 +1,144 @@ +"""Unit tests for Flask application.""" + +import json +import pytest +import sys +import os +from datetime import datetime + + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) + +from app import app + + +@pytest.fixture +def client(): + """Create test client for Flask app.""" + app.config['TESTING'] = True + app.config['DEBUG'] = False + with app.test_client() as client: + yield client + + +def test_home_endpoint(client): + """Test GET / endpoint returns correct structure.""" + response = client.get('/') + assert response.status_code == 200 + assert response.content_type == 'application/json' + + data = json.loads(response.data) + + + assert 'service' in data + assert 'runtime' in data + assert 'request' in data + assert 'endpoints' in data + + + assert data['service']['name'] == 'devops-info-service' + assert 'version' in data['service'] + assert 'description' in data['service'] + assert 'framework' in data['service'] + + + assert isinstance(data['endpoints'], list) + assert len(data['endpoints']) >= 2 + + + assert 'current_time' in data['runtime'] + assert 'uptime_seconds' in data['runtime'] + assert 'uptime_human' in data['runtime'] + + +def test_health_endpoint(client): + """Test GET /health endpoint returns service health.""" + response = client.get('/health') + assert response.status_code == 200 + assert response.content_type == 'application/json' + + data = json.loads(response.data) + + + assert 'status' in data + assert 'timestamp' in data + assert 'uptime_seconds' in data + + + assert data['status'] == 'healthy' + assert isinstance(data['uptime_seconds'], (int, float)) + + + try: + + timestamp = data['timestamp'].replace('Z', '+00:00') + datetime.fromisoformat(timestamp) + except (ValueError, AttributeError): + pytest.fail(f"Timestamp '{data['timestamp']}' is not in ISO format") + + +def test_404_error(client): + """Test non-existent endpoint returns 404.""" + response = client.get('/non-existent-path-12345') + assert response.status_code == 404 + + + if response.content_type and 'application/json' in response.content_type: + data = json.loads(response.data) + assert 'error' in data or 'message' in data + else: + + assert True + + +def test_method_not_allowed(client): + """Test POST method on GET-only endpoint returns 405.""" + response = client.post('/') + assert response.status_code == 405 + + + assert response.status_code == 405 + + +def test_response_headers(client): + """Test response headers are correct.""" + response = client.get('/') + assert 'Content-Type' in response.headers + assert response.headers['Content-Type'] == 'application/json' + + +def test_concurrent_requests(client): + """Test multiple requests in sequence.""" + for i in range(5): + response = client.get('/') + assert response.status_code == 200 + + response = client.get('/health') + assert response.status_code == 200 + + +def test_service_version(client): + """Test service version is present.""" + response = client.get('/') + data = json.loads(response.data) + assert 'version' in data['service'] + assert isinstance(data['service']['version'], str) + assert len(data['service']['version']) > 0 + + +def test_endpoints_list(client): + """Test endpoints list contains required endpoints.""" + response = client.get('/') + data = json.loads(response.data) + + endpoints = data['endpoints'] + paths = [ep['path'] for ep in endpoints] + + assert '/' in paths + assert '/health' in paths + + + for endpoint in endpoints: + assert 'method' in endpoint + assert 'path' in endpoint + assert 'description' in endpoint \ No newline at end of file diff --git a/k8s/README.md b/k8s/README.md new file mode 100644 index 0000000000..4f56f691fa --- /dev/null +++ b/k8s/README.md @@ -0,0 +1,64 @@ +\# Lab 9: Kubernetes Fundamentals + + + +\## Architecture + + + +\### Components + +| Component | Description | + +|-----------|-------------| + +| \*\*Cluster\*\* | Minikube v1.38.1 (single node) | + +| \*\*Kubernetes\*\* | v1.35.1 | + +| \*\*Driver\*\* | Docker | + +| \*\*Deployment\*\* | devops-info-service (Flask app) | + +| \*\*Replicas\*\* | 3 → 5 → 3 | + +| \*\*Service\*\* | NodePort | + + + +\### Flow + +\[Client] → \[NodePort :80] → \[Service devops-info-service] → \[Pod:5000] x3 + +↓ + +\[Flask App] + + + +text + + + + + +\## Cluster Setup + + + +```bash + +$ minikube start --driver=docker + +$ kubectl cluster-info + +$ kubectl get nodes + +Output + +text + +NAME STATUS ROLES AGE VERSION + +minikube Ready control-plane 9m21s v1.35.1 + diff --git a/k8s/deployment.yml b/k8s/deployment.yml new file mode 100644 index 0000000000..59726d043b --- /dev/null +++ b/k8s/deployment.yml @@ -0,0 +1,39 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: devops-info-service +spec: + replicas: 3 + selector: + matchLabels: + app: devops-info-service + template: + metadata: + labels: + app: devops-info-service + spec: + containers: + - name: devops-info-service + image: devops-info-service:latest + imagePullPolicy: IfNotPresent + ports: + - containerPort: 5000 + resources: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "256Mi" + cpu: "200m" + livenessProbe: + httpGet: + path: /health + port: 5000 + initialDelaySeconds: 10 + periodSeconds: 10 + readinessProbe: + httpGet: + path: /health + port: 5000 + initialDelaySeconds: 5 + periodSeconds: 5 \ No newline at end of file diff --git a/k8s/docs/photo_2026-05-14_23-48-15.jpg b/k8s/docs/photo_2026-05-14_23-48-15.jpg new file mode 100644 index 0000000000..2706f4cfda Binary files /dev/null and b/k8s/docs/photo_2026-05-14_23-48-15.jpg differ diff --git a/k8s/docs/photo_2026-05-14_23-48-32.jpg b/k8s/docs/photo_2026-05-14_23-48-32.jpg new file mode 100644 index 0000000000..0d02a34bf9 Binary files /dev/null and b/k8s/docs/photo_2026-05-14_23-48-32.jpg differ diff --git a/k8s/service.yml b/k8s/service.yml new file mode 100644 index 0000000000..f32becf846 --- /dev/null +++ b/k8s/service.yml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: Service +metadata: + name: devops-info-service +spec: + type: NodePort + selector: + app: devops-info-service + ports: + - name: http + port: 80 + targetPort: 5000 \ No newline at end of file diff --git a/labs/docs/LAB04.md b/labs/docs/LAB04.md new file mode 100644 index 0000000000..7ca8d1381c --- /dev/null +++ b/labs/docs/LAB04.md @@ -0,0 +1,140 @@ +# Lab 04 — Infrastructure as Code + +## 1. Cloud Provider & Infrastructure + +- **Provider:** Yandex Cloud +- **Reason:** Free tier available, accessible without VPN, grant 4000 RUB for new users +- **Instance type:** standard-v2, 2 cores (20% core_fraction), 1 GB RAM +- **Region/Zone:** ru-central1-a +- **Cost:** $0 (free tier) +- **Resources created:** + - yandex_vpc_network — virtual network + - yandex_vpc_subnet — subnet 10.0.1.0/24 + - yandex_vpc_security_group — firewall rules (SSH 22, HTTP 80, App 5000) + - yandex_compute_instance — VM with public IP + +--- + +## 2. Terraform Implementation + +- **Terraform version:** 1.9.8 +- **Project structure:** + - `main.tf` — provider configuration and all resources + - `variables.tf` — input variables (folder_id, zone, ssh_public_key) + - `outputs.tf` — public IP and SSH command + - `terraform.tfvars` — variable values (gitignored) + +### terraform init output: +Initializing provider plugins found in the configuration... + +Finding yandex-cloud/yandex versions matching "~> 0.84"... +Installing yandex-cloud/yandex v0.203.0... +Installed yandex-cloud/yandex v0.203.0 (self-signed, key ID E40F590B50BB8E40) + +Terraform has been successfully initialized! + +### terraform plan output: +Plan: 4 to add, 0 to change, 0 to destroy. +Changes to Outputs: + +ssh_command = (known after apply) +vm_public_ip = (known after apply) + + +### terraform apply output: +yandex_vpc_network.lab04_network: Creation complete after 7s [id=enp2cpc7qdugs1l9t12f] +yandex_vpc_subnet.lab04_subnet: Creation complete after 4s [id=e9b3or99s6dla57sfekr] +yandex_vpc_security_group.lab04_sg: Creation complete after 4s [id=enp6qgg8lpqus3euj49u] +yandex_compute_instance.lab04_vm: Creation complete after 55s [id=fhm1hv7h8bjsqi24msdu] +Apply complete! Resources: 4 added, 0 changed, 0 destroyed. +Outputs: +ssh_command = "ssh ubuntu@51.250.73.116" +vm_public_ip = "51.250.73.116" + +### terraform destroy output: +yandex_compute_instance.lab04_vm: Destruction complete after 30s +yandex_vpc_security_group.lab04_sg: Destruction complete after 3s +yandex_vpc_subnet.lab04_subnet: Destruction complete after 7s +yandex_vpc_network.lab04_network: Destruction complete after 1s +Destroy complete! Resources: 4 destroyed. + +### SSH access proof: +$ ssh -i ~/.ssh/lab04_key ubuntu@51.250.73.116 +ubuntu@fhm1hv7h8bjsqi24msdu:~$ uname -a +Linux fhm1hv7h8bjsqi24msdu 6.8.0-107-generic #107-Ubuntu SMP PREEMPT_DYNAMIC Fri Mar 13 19:51:50 UTC 2026 x86_64 x86_64 x86_64 GNU/Linux +ubuntu@fhm1hv7h8bjsqi24msdu:~$ hostname +fhm1hv7h8bjsqi24msdu + +--- + +## 3. Pulumi Implementation + +- **Pulumi version:** 3.239.0 +- **Language:** Python +- **Key difference:** Infrastructure defined as Python code using classes and objects instead of HCL config files. Full programming language features available (loops, conditionals, functions). + +### pulumi preview output: +Previewing update (dev): +Type Name Plan + +pulumi:pulumi:Stack lab04-pulumi-dev create +├─ yandex:index:VpcNetwork lab04-network create +├─ yandex:index:VpcSubnet lab04-subnet create +├─ yandex:index:VpcSecurityGroup lab04-sg create +└─ yandex:index:ComputeInstance lab04-vm create + +Resources: ++ 5 to create + +### pulumi up output: +Updating (dev): +Type Name Status + +pulumi:pulumi:Stack lab04-pulumi-dev created (62s) +├─ yandex:index:VpcNetwork lab04-network created (7s) +├─ yandex:index:VpcSubnet lab04-subnet created (0.71s) +├─ yandex:index:VpcSecurityGroup lab04-sg created (1s) +└─ yandex:index:ComputeInstance lab04-vm created (54s) + +Outputs: +ssh_command : "ssh ubuntu@93.77.181.6" +vm_public_ip: "93.77.181.6" +Resources: ++ 5 created +Duration: 1m4s + +### SSH access proof: +$ ssh -i ~/.ssh/lab04_key ubuntu@93.77.181.6 +ubuntu@fhm9vuinvfshd0catqu2:~$ uname -a +Linux fhm9vuinvfshd0catqu2 6.8.0-107-generic #107-Ubuntu SMP PREEMPT_DYNAMIC Fri Mar 13 19:51:50 UTC 2026 x86_64 x86_64 x86_64 GNU/Linux +ubuntu@fhm9vuinvfshd0catqu2:~$ hostname +fhm9vuinvfshd0catqu2 + +--- + +## 4. Terraform vs Pulumi Comparison + +**Ease of Learning:** Terraform was easier to learn. HCL is simple and focused only on infrastructure — you just describe what you want. Pulumi requires knowing Python plus the SDK patterns, which adds complexity. + +**Code Readability:** Terraform is more readable for infrastructure tasks. Each HCL block clearly maps to one resource. Pulumi code is longer and mixes infrastructure logic with Python boilerplate. + +**Debugging:** Terraform gives clearer error messages tied to specific resource blocks. Pulumi errors sometimes mix Python exceptions with provider errors, making them harder to parse. + +**Documentation:** Terraform has more examples and community resources. Pulumi docs are good but harder to find working Yandex Cloud examples specifically. + +**Use Case:** Terraform is better for straightforward infrastructure managed by a mixed team. Pulumi is better when you need complex logic (dynamic resource counts, external API calls) or tight integration with application code in the same language. + +--- + +## 5. Lab 5 Preparation & Cleanup + +**VM for Lab 5:** Yes, keeping the Pulumi-created VM. + +- **Public IP:** 93.77.181.6 +- **SSH command:** `ssh -i ~/.ssh/lab04_key ubuntu@93.77.181.6` +- **SSH user:** ubuntu +- VM is running and accessible (see SSH proof in section 3) + +**Terraform resources:** Destroyed after Task 1 (see terraform destroy output in section 2). + +**Pulumi resources:** Running, will be used for Lab 5. diff --git a/monitoring/docker-compose.yml b/monitoring/docker-compose.yml new file mode 100644 index 0000000000..1ac37cc3d1 --- /dev/null +++ b/monitoring/docker-compose.yml @@ -0,0 +1,158 @@ +services: + loki: + image: grafana/loki:3.0.0 + container_name: loki + command: -config.file=/etc/loki/config.yml + ports: + - "3100:3100" + volumes: + - ./loki/config.yml:/etc/loki/config.yml:ro + - loki-data:/loki + networks: + - logging + healthcheck: + test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:3100/ready || exit 1"] + interval: 15s + timeout: 10s + retries: 5 + start_period: 20s + deploy: + resources: + limits: + cpus: '0.5' + memory: 512M + reservations: + cpus: '0.25' + memory: 256M + restart: unless-stopped + + promtail: + image: grafana/promtail:3.0.0 + container_name: promtail + command: -config.file=/etc/promtail/config.yml + ports: + - "9080:9080" + volumes: + - ./promtail/config.yml:/etc/promtail/config.yml:ro + - /var/lib/docker/containers:/var/lib/docker/containers:ro + - /var/run/docker.sock:/var/run/docker.sock:ro + networks: + - logging + depends_on: + - loki + deploy: + resources: + limits: + cpus: '0.3' + memory: 256M + reservations: + cpus: '0.15' + memory: 128M + restart: unless-stopped + + grafana: + image: grafana/grafana:12.3.1 + container_name: grafana + ports: + - "3000:3000" + environment: + - GF_AUTH_ANONYMOUS_ENABLED=false + - GF_SECURITY_ADMIN_USER=admin + - GF_SECURITY_ADMIN_PASSWORD=admin123 + - GF_INSTALL_PLUGINS=grafana-piechart-panel + volumes: + - grafana-data:/var/lib/grafana + networks: + - logging + depends_on: + - loki + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:3000/api/health || exit 1"] + interval: 15s + timeout: 10s + retries: 5 + start_period: 30s + deploy: + resources: + limits: + cpus: '0.5' + memory: 512M + reservations: + cpus: '0.25' + memory: 256M + restart: unless-stopped + + devops-python: + build: + context: C:/lab1-devops/app_python + dockerfile: Dockerfile + container_name: devops-python + ports: + - "8000:5000" + environment: + - HOST=0.0.0.0 + - PORT=5000 + - DEBUG=false + networks: + - logging + labels: + logging: "promtail" + app: "devops-python" + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:5000/health || exit 1"] + interval: 15s + timeout: 10s + retries: 5 + start_period: 15s + deploy: + resources: + limits: + cpus: '0.5' + memory: 256M + reservations: + cpus: '0.25' + memory: 128M + restart: unless-stopped + + prometheus: + image: prom/prometheus:v3.9.0 + container_name: prometheus + ports: + - "9090:9090" + command: + - '--config.file=/etc/prometheus/prometheus.yml' + - '--storage.tsdb.retention.time=15d' + - '--storage.tsdb.retention.size=10GB' + volumes: + - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro + - prometheus-data:/prometheus + networks: + - logging + healthcheck: + test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:9090/-/healthy || exit 1"] + interval: 15s + timeout: 10s + retries: 5 + start_period: 20s + deploy: + resources: + limits: + cpus: '1.0' + memory: 1G + reservations: + cpus: '0.5' + memory: 512M + restart: unless-stopped + +networks: + logging: + driver: bridge + name: logging-network + +volumes: + loki-data: + name: loki-data + grafana-data: + name: grafana-data + prometheus-data: + name: prometheus-data \ No newline at end of file diff --git a/monitoring/docs/LAB08.md b/monitoring/docs/LAB08.md new file mode 100644 index 0000000000..8d76102319 --- /dev/null +++ b/monitoring/docs/LAB08.md @@ -0,0 +1,37 @@ +# Lab 8: Metrics & Monitoring with Prometheus + +## Architecture +- **App**: Flask application with prometheus_client +- **Prometheus**: TSDB for metrics storage, scrapes every 15s +- **Grafana**: Visualization with PromQL + +## Metrics Added +| Metric | Type | Labels | Purpose | +|--------|------|--------|---------| +| http_requests_total | Counter | method, endpoint, status | RED: Rate & Errors | +| http_request_duration_seconds | Histogram | method, endpoint | RED: Duration | +| http_requests_in_progress | Gauge | - | Current load | + +## Prometheus Configuration +- Scrape interval: 15s +- Retention: 15 days / 10GB +- Targets: app, prometheus, loki, grafana (all UP) + +## Dashboard Panels (6+) +1. **Request Rate** - `sum(rate(http_requests_total[5m])) by (endpoint)` +2. **Request Duration p95** - `histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m]))` +3. **Active Requests** - `http_requests_in_progress` +4. **Status Code Distribution** - `sum by (status) (rate(http_requests_total[5m]))` +5. **Uptime** - `up{job="app"}` +6. **Error Rate** - `sum(rate(http_requests_total{status=~"5.."}[5m]))` + +## Evidence + +### /metrics endpoint +![metrics](screenshots_lab8/metrics.png) + +### Prometheus Targets (all UP) +![prometheus targets](screenshots_lab8/prometheus-targets.png) + +### Grafana Dashboard +![dashboard](screenshots_lab8/dashboard.png) \ No newline at end of file diff --git a/monitoring/docs/screenshots/photo_2026-05-14_21-41-24.jpg b/monitoring/docs/screenshots/photo_2026-05-14_21-41-24.jpg new file mode 100644 index 0000000000..a1a462ca5c Binary files /dev/null and b/monitoring/docs/screenshots/photo_2026-05-14_21-41-24.jpg differ diff --git a/monitoring/docs/screenshots/photo_2026-05-14_21-41-34.jpg b/monitoring/docs/screenshots/photo_2026-05-14_21-41-34.jpg new file mode 100644 index 0000000000..6ec210e807 Binary files /dev/null and b/monitoring/docs/screenshots/photo_2026-05-14_21-41-34.jpg differ diff --git a/monitoring/docs/screenshots/photo_2026-05-14_21-41-40.jpg b/monitoring/docs/screenshots/photo_2026-05-14_21-41-40.jpg new file mode 100644 index 0000000000..f6a2304a46 Binary files /dev/null and b/monitoring/docs/screenshots/photo_2026-05-14_21-41-40.jpg differ diff --git a/monitoring/docs/screenshots/photo_2026-05-14_21-41-44.jpg b/monitoring/docs/screenshots/photo_2026-05-14_21-41-44.jpg new file mode 100644 index 0000000000..b205b85d3c Binary files /dev/null and b/monitoring/docs/screenshots/photo_2026-05-14_21-41-44.jpg differ diff --git a/monitoring/docs/screenshots/photo_2026-05-14_21-41-49.jpg b/monitoring/docs/screenshots/photo_2026-05-14_21-41-49.jpg new file mode 100644 index 0000000000..7d8b02cf4a Binary files /dev/null and b/monitoring/docs/screenshots/photo_2026-05-14_21-41-49.jpg differ diff --git a/monitoring/docs/screenshots/photo_2026-05-14_21-41-52.jpg b/monitoring/docs/screenshots/photo_2026-05-14_21-41-52.jpg new file mode 100644 index 0000000000..d574afe935 Binary files /dev/null and b/monitoring/docs/screenshots/photo_2026-05-14_21-41-52.jpg differ diff --git a/monitoring/docs/screenshots_lab8/photo_2026-05-14_23-12-15.jpg b/monitoring/docs/screenshots_lab8/photo_2026-05-14_23-12-15.jpg new file mode 100644 index 0000000000..7164a28a3e Binary files /dev/null and b/monitoring/docs/screenshots_lab8/photo_2026-05-14_23-12-15.jpg differ diff --git a/monitoring/docs/screenshots_lab8/photo_2026-05-14_23-12-24.jpg b/monitoring/docs/screenshots_lab8/photo_2026-05-14_23-12-24.jpg new file mode 100644 index 0000000000..c85439883d Binary files /dev/null and b/monitoring/docs/screenshots_lab8/photo_2026-05-14_23-12-24.jpg differ diff --git a/monitoring/docs/screenshots_lab8/photo_2026-05-14_23-12-29.jpg b/monitoring/docs/screenshots_lab8/photo_2026-05-14_23-12-29.jpg new file mode 100644 index 0000000000..0751e5ac51 Binary files /dev/null and b/monitoring/docs/screenshots_lab8/photo_2026-05-14_23-12-29.jpg differ diff --git a/monitoring/docs/screenshots_lab8/photo_2026-05-14_23-12-34.jpg b/monitoring/docs/screenshots_lab8/photo_2026-05-14_23-12-34.jpg new file mode 100644 index 0000000000..9f65ed7aca Binary files /dev/null and b/monitoring/docs/screenshots_lab8/photo_2026-05-14_23-12-34.jpg differ diff --git a/monitoring/docs/screenshots_lab8/photo_2026-05-14_23-12-38.jpg b/monitoring/docs/screenshots_lab8/photo_2026-05-14_23-12-38.jpg new file mode 100644 index 0000000000..a42d87dcdc Binary files /dev/null and b/monitoring/docs/screenshots_lab8/photo_2026-05-14_23-12-38.jpg differ diff --git a/monitoring/docs/screenshots_lab8/photo_2026-05-14_23-12-42.jpg b/monitoring/docs/screenshots_lab8/photo_2026-05-14_23-12-42.jpg new file mode 100644 index 0000000000..8b9598eec4 Binary files /dev/null and b/monitoring/docs/screenshots_lab8/photo_2026-05-14_23-12-42.jpg differ diff --git a/monitoring/loki/config.yml b/monitoring/loki/config.yml new file mode 100644 index 0000000000..fbe8273a92 --- /dev/null +++ b/monitoring/loki/config.yml @@ -0,0 +1,35 @@ +auth_enabled: false + +server: + http_listen_port: 3100 + +common: + path_prefix: /loki + storage: + filesystem: + chunks_directory: /loki/chunks + rules_directory: /loki/rules + replication_factor: 1 + ring: + kvstore: + store: inmemory + +schema_config: + configs: + - from: 2024-01-01 + store: tsdb + object_store: filesystem + schema: v13 + index: + prefix: index_ + period: 24h + +limits_config: + retention_period: 168h + +ingester: + lifecycler: + ring: + kvstore: + store: inmemory + replication_factor: 1 \ No newline at end of file diff --git a/monitoring/prometheus/prometheus.yml b/monitoring/prometheus/prometheus.yml new file mode 100644 index 0000000000..20b5c7f240 --- /dev/null +++ b/monitoring/prometheus/prometheus.yml @@ -0,0 +1,23 @@ +global: + scrape_interval: 15s + evaluation_interval: 15s + +scrape_configs: + - job_name: 'prometheus' + static_configs: + - targets: ['localhost:9090'] + + - job_name: 'app' + static_configs: + - targets: ['devops-python:5000'] + metrics_path: '/metrics' + + - job_name: 'loki' + static_configs: + - targets: ['loki:3100'] + metrics_path: '/metrics' + + - job_name: 'grafana' + static_configs: + - targets: ['grafana:3000'] + metrics_path: '/metrics' diff --git a/monitoring/promtail/config.yml b/monitoring/promtail/config.yml new file mode 100644 index 0000000000..65beff4067 --- /dev/null +++ b/monitoring/promtail/config.yml @@ -0,0 +1,50 @@ +server: + http_listen_port: 9080 + grpc_listen_port: 0 + log_level: info + +positions: + filename: /tmp/positions.yaml + +clients: + - url: http://loki:3100/loki/api/v1/push + batchwait: 5s + batchsize: 1048576 + timeout: 10s + +scrape_configs: + - job_name: docker + docker_sd_configs: + - host: unix:///var/run/docker.sock + refresh_interval: 10s + filters: + - name: label + values: ["logging=promtail"] + relabel_configs: + - source_labels: ["__meta_docker_container_name"] + target_label: "container" + regex: "/(.*)" + replacement: "$1" + - source_labels: ["__meta_docker_container_label_app"] + target_label: "app" + - source_labels: ["__meta_docker_container_id"] + target_label: "container_id" + - source_labels: ["__meta_docker_container_label_com_docker_compose_service"] + target_label: "service" + - source_labels: ["__meta_docker_container_network"] + target_label: "network" + pipeline_stages: + - docker: {} + - json: + expressions: + level: level + method: method + path: path + status_code: status_code + duration_ms: duration_ms + - labels: + level: level + method: method + - drop: + source: level + expression: "debug" \ No newline at end of file diff --git a/pulumi/.gitignore b/pulumi/.gitignore new file mode 100644 index 0000000000..a3807e5bdb --- /dev/null +++ b/pulumi/.gitignore @@ -0,0 +1,2 @@ +*.pyc +venv/ diff --git a/pulumi/Pulumi.yaml b/pulumi/Pulumi.yaml new file mode 100644 index 0000000000..39e812a776 --- /dev/null +++ b/pulumi/Pulumi.yaml @@ -0,0 +1,11 @@ +name: lab04-pulumi +description: Lab 04 Pulumi IaC +runtime: + name: python + options: + toolchain: pip + virtualenv: venv +config: + pulumi:tags: + value: + pulumi:template: python diff --git a/pulumi/__main__.py b/pulumi/__main__.py new file mode 100644 index 0000000000..0b790b51e2 --- /dev/null +++ b/pulumi/__main__.py @@ -0,0 +1,87 @@ +import os +import pulumi +import pulumi_yandex as yandex + +os.environ["YC_SERVICE_ACCOUNT_KEY_FILE"] = os.path.expanduser("~/key.json") + +SSH_PUBLIC_KEY = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIJ7C/mVRl+EokdvvyE8LalEr/6Bki/CGHxL8bhL33xK6 lab04" + +# Сеть +network = yandex.VpcNetwork("lab04-network", + name="lab04-network" +) + +# Подсеть +subnet = yandex.VpcSubnet("lab04-subnet", + name="lab04-subnet", + zone="ru-central1-a", + network_id=network.id, + v4_cidr_blocks=["10.0.1.0/24"] +) + +# Группа безопасности +sg = yandex.VpcSecurityGroup("lab04-sg", + name="lab04-sg", + network_id=network.id, + ingresses=[ + yandex.VpcSecurityGroupIngressArgs( + protocol="TCP", + port=22, + v4_cidr_blocks=["0.0.0.0/0"], + description="SSH" + ), + yandex.VpcSecurityGroupIngressArgs( + protocol="TCP", + port=80, + v4_cidr_blocks=["0.0.0.0/0"], + description="HTTP" + ), + yandex.VpcSecurityGroupIngressArgs( + protocol="TCP", + port=5000, + v4_cidr_blocks=["0.0.0.0/0"], + description="App port" + ), + ], + egresses=[ + yandex.VpcSecurityGroupEgressArgs( + protocol="ANY", + v4_cidr_blocks=["0.0.0.0/0"] + ) + ] +) + +# Виртуальная машина +vm = yandex.ComputeInstance("lab04-vm", + name="lab04-vm", + platform_id="standard-v2", + zone="ru-central1-a", + resources=yandex.ComputeInstanceResourcesArgs( + cores=2, + memory=1, + core_fraction=20 + ), + boot_disk=yandex.ComputeInstanceBootDiskArgs( + initialize_params=yandex.ComputeInstanceBootDiskInitializeParamsArgs( + image_id="fd83esfomhq25p2ono90", + size=10, + type="network-hdd" + ) + ), + network_interfaces=[ + yandex.ComputeInstanceNetworkInterfaceArgs( + subnet_id=subnet.id, + nat=True, + security_group_ids=[sg.id] + ) + ], + metadata={ + "ssh-keys": f"ubuntu:{SSH_PUBLIC_KEY}" + }, + labels={"lab": "lab04"} +) + +pulumi.export("vm_public_ip", vm.network_interfaces[0].nat_ip_address) +pulumi.export("ssh_command", vm.network_interfaces[0].nat_ip_address.apply( + lambda ip: f"ssh ubuntu@{ip}" +)) diff --git a/pulumi/requirements.txt b/pulumi/requirements.txt new file mode 100644 index 0000000000..bc4e43087b --- /dev/null +++ b/pulumi/requirements.txt @@ -0,0 +1 @@ +pulumi>=3.0.0,<4.0.0 diff --git a/terraform/main.tf b/terraform/main.tf new file mode 100644 index 0000000000..88a7859e5e --- /dev/null +++ b/terraform/main.tf @@ -0,0 +1,90 @@ +terraform { + required_providers { + yandex = { + source = "yandex-cloud/yandex" + version = "~> 0.84" + } + } +} + +provider "yandex" { + service_account_key_file = pathexpand("~/key.json") + folder_id = var.folder_id + zone = var.zone +} + +resource "yandex_vpc_network" "lab04_network" { + name = "lab04-network" +} + +resource "yandex_vpc_subnet" "lab04_subnet" { + name = "lab04-subnet" + zone = var.zone + network_id = yandex_vpc_network.lab04_network.id + v4_cidr_blocks = ["10.0.1.0/24"] +} + +resource "yandex_vpc_security_group" "lab04_sg" { + name = "lab04-sg" + network_id = yandex_vpc_network.lab04_network.id + + ingress { + protocol = "TCP" + port = 22 + v4_cidr_blocks = ["0.0.0.0/0"] + description = "SSH" + } + + ingress { + protocol = "TCP" + port = 80 + v4_cidr_blocks = ["0.0.0.0/0"] + description = "HTTP" + } + + ingress { + protocol = "TCP" + port = 5000 + v4_cidr_blocks = ["0.0.0.0/0"] + description = "App port" + } + + egress { + protocol = "ANY" + v4_cidr_blocks = ["0.0.0.0/0"] + } +} + +resource "yandex_compute_instance" "lab04_vm" { + name = "lab04-vm" + platform_id = "standard-v2" + zone = var.zone + + resources { + cores = 2 + memory = 1 + core_fraction = 20 + } + + boot_disk { + initialize_params { + image_id = "fd83esfomhq25p2ono90" + size = 10 + type = "network-hdd" + } + } + + network_interface { + subnet_id = yandex_vpc_subnet.lab04_subnet.id + nat = true + security_group_ids = [yandex_vpc_security_group.lab04_sg.id] + } + + metadata = { + ssh-keys = "ubuntu:${var.ssh_public_key}" + } + + labels = { + lab = "lab04" + } +} diff --git a/terraform/outputs.tf b/terraform/outputs.tf new file mode 100644 index 0000000000..3c9d4c0b4f --- /dev/null +++ b/terraform/outputs.tf @@ -0,0 +1,9 @@ +output "vm_public_ip" { + description = "Public IP address of the VM" + value = yandex_compute_instance.lab04_vm.network_interface[0].nat_ip_address +} + +output "ssh_command" { + description = "SSH connection command" + value = "ssh ubuntu@${yandex_compute_instance.lab04_vm.network_interface[0].nat_ip_address}" +} diff --git a/terraform/variables.tf b/terraform/variables.tf new file mode 100644 index 0000000000..cca7b57bc9 --- /dev/null +++ b/terraform/variables.tf @@ -0,0 +1,15 @@ +variable "folder_id" { + description = "Yandex Cloud folder ID" + type = string +} + +variable "zone" { + description = "Availability zone" + type = string + default = "ru-central1-a" +} + +variable "ssh_public_key" { + description = "SSH public key content" + type = string +}