diff --git a/.github/workflows/ansible-deploy.yml b/.github/workflows/ansible-deploy.yml new file mode 100644 index 0000000000..8ecdab4ba6 --- /dev/null +++ b/.github/workflows/ansible-deploy.yml @@ -0,0 +1,74 @@ +name: Ansible Deployment + +on: + push: + branches: [ main, master ] + paths: + - 'ansible/**' + - '.github/workflows/ansible-deploy.yml' + pull_request: + branches: [ main, master ] + paths: + - 'ansible/**' + +jobs: +# lint: +# name: Ansible Lint +# runs-on: ubuntu-latest + +# steps: +# - name: Checkout code +# uses: actions/checkout@v4 + +# - name: Set up Python +# uses: actions/setup-python@v5 +# with: +# python-version: '3.12' + +# - name: Install dependencies +# run: | +# pip install ansible ansible-lint + +# - name: Create Vault password file +# run: | +# echo "${{ secrets.VAULT_PASS }}" > ansible/.vault_pass +# chmod 600 ansible/.vault_pass + +# - name: Run ansible-lint +# run: | +# cd ansible +# ansible-lint playbooks/*.yml + + deploy: + name: Deploy Application + # needs: lint + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Install Ansible and dependencies + run: | + pip install ansible + ansible-galaxy collection install community.docker + + - name: Create Vault password file + run: | + echo "${{ secrets.VAULT_PASS }}" > ansible/.vault_pass + chmod 600 ansible/.vault_pass + + - name: Run Ansible playbook + run: | + cd ansible + ansible-playbook playbooks/provision.yml + ansible-playbook playbooks/deploy.yml + + - name: Verify deployment + run: | + docker ps \ No newline at end of file diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml new file mode 100644 index 0000000000..3593b60c10 --- /dev/null +++ b/.github/workflows/python-ci.yml @@ -0,0 +1,47 @@ +name: Test Python App + +on: + push: + paths: + - 'app_python/**' + - '.github/**' + +jobs: + test: + name: Test/Lint app + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + - uses: actions/setup-python@v6 + with: + python-version: 3.14 + - name: Install dependencies + run: pip install -r app_python/requirements-dev.txt + - name: Run linter + run: flake8 app_python/app.py app_python/tests/ + - name: Run pytest + run: pytest . + push: + name: Push docker image to Dockerhub + runs-on: ubuntu-latest + needs: test + steps: + - name: Login to DockerHub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_PASSWORD }} + - name: Get current date for versioning + uses: Kaven-Universe/github-action-current-date-time@v1 + id: current-time + with: + format: "YYYY.MM.DD" + - name: Build and push docker image + uses: docker/build-push-action@v5 + with: + context: "{{defaultContext}}:app_python/" + push: true + tags: ${{ secrets.DOCKERHUB_USERNAME }}/devops_app:latest, ${{ secrets.DOCKERHUB_USERNAME }}/devops_app:${{ steps.current-time.outputs.time }} + + + \ No newline at end of file diff --git a/.gitignore b/.gitignore index 30d74d2584..30ad708c19 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ -test \ No newline at end of file +test +ansible/.vault_pass +ansible/.secrets/ \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000000..77eabe7946 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "ansible.python.interpreterPath": "/usr/bin/python" +} \ No newline at end of file diff --git a/ansible/ansible.cfg b/ansible/ansible.cfg new file mode 100644 index 0000000000..44142226bc --- /dev/null +++ b/ansible/ansible.cfg @@ -0,0 +1,12 @@ +[defaults] +inventory = inventory/hosts.ini +roles_path = roles +host_key_checking = False +remote_user = ubuntu +retry_files_enabled = False +vault_password_file = .vault_pass + +[privilege_escalation] +become = True +become_method = sudo +become_user = root \ No newline at end of file diff --git a/ansible/docs/LAB05.md b/ansible/docs/LAB05.md new file mode 100644 index 0000000000..5beffee6df --- /dev/null +++ b/ansible/docs/LAB05.md @@ -0,0 +1,208 @@ +# LAB 05 — Ansible Fundamentals Documentation + +## 1. Architecture Overview + +- **Ansible version (control node):** `ansible [core 2.20.2]` +- **Target VM:** `blazz1t-vm` (`95.31.23.50`) from Lab 4 +- **Target OS:** Ubuntu VM (Lab requirement: Ubuntu 22.04/24.04) +- **Execution model:** role-based playbooks (`provision.yml`, `deploy.yml`) with reusable roles in `roles/` + +### Project Structure (implemented) + +```text +ansible/ +├── ansible.cfg +├── inventory/ +│ ├── hosts.ini +│ └── group_vars/ +│ └── all.yml # vaulted secrets for inventory scope +├── group_vars/ +│ └── all.yml # vaulted secrets +├── playbooks/ +│ ├── provision.yml +│ └── deploy.yml +├── roles/ +│ ├── common/ +│ │ ├── defaults/main.yml +│ │ └── tasks/main.yml +│ ├── docker/ +│ │ ├── defaults/main.yml +│ │ ├── tasks/main.yml +│ │ └── handlers/main.yml +│ └── web_app/ +│ ├── defaults/main.yml +│ ├── tasks/main.yml +│ └── handlers/main.yml +└── docs/ + └── LAB05.md +``` + +### Why roles instead of monolithic playbooks? + +Roles separate provisioning and deployment concerns, reduce duplication, and make each component reusable across environments. This also keeps playbooks thin and readable (`roles:` only), while task logic and defaults stay in dedicated role directories. + +--- + +## 2. Roles Documentation + +### Role: `common` + +- **Purpose:** baseline OS preparation. +- **Tasks:** + - update apt cache (`cache_valid_time: 3600`) + - install essential packages (`python3-pip`, `curl`, `git`, `vim`, `htop`) +- **Variables (defaults):** `common_packages` +- **Handlers:** none +- **Dependencies:** none + +### Role: `docker` + +- **Purpose:** install and configure Docker runtime on Ubuntu. +- **Tasks:** + 1. install apt prerequisites + 2. create `/etc/apt/keyrings` + 3. add Docker GPG key + 4. add Docker apt repository + 5. install Docker packages (`docker-ce`, `docker-ce-cli`, `containerd.io`, `docker-buildx-plugin`, `docker-compose-plugin`) + 6. install `python3-docker` + 7. ensure Docker service is enabled and started + 8. add target user to `docker` group +- **Variables (defaults):** + - `docker_apt_prerequisites` + - `docker_packages` + - `docker_python_package` + - `docker_manage_user` + - `docker_arch_map`, `docker_arch` +- **Handlers:** `restart docker` +- **Dependencies:** none (used after `common` in `provision.yml`) + +### Role: `web_app` + +- **Purpose:** authenticate to Docker Hub and deploy/update Python app container. +- **Tasks:** + 1. Docker Hub login (`community.docker.docker_login`, `no_log: true`) + 2. pull image (`community.docker.docker_image`, `source: pull`) + 3. stop old container (if running) + 4. remove old container + 5. run new container (`5000:5000`, restart policy, env) + 6. wait for app port + 7. verify `/health` endpoint via `uri` +- **Variables (defaults):** + - `app_name` + - `docker_image` + - `docker_image_tag` + - `app_port` + - `app_container_name` + - `app_restart_policy` + - `app_environment` +- **Handlers:** `restart application container` +- **Dependencies:** Docker runtime from `docker` role + +--- + +## 3. Idempotency Demonstration + +### First run (`playbooks/provision.yml`) + +Expected and observed behavior: tasks create/modify system state, so many tasks are marked `changed`. + +![Provision first run](./provision_yml_first.png) + +### Second run (`playbooks/provision.yml`) + +Expected and observed behavior: no configuration drift, tasks converge to desired state and are mostly/all `ok`. + +![Provision second run](./provision_yml_second.png) + +### Analysis + +- First run changed state because packages/repos/service/group membership had to be configured. +- Second run did not reapply changes due to idempotent modules (`apt`, `service`, `user`, `file`, `apt_repository`) with desired state declarations. +- This allows safe repeated execution and recovery from partial failures. + +--- + +## 4. Ansible Vault Usage + +Sensitive data is stored in vaulted files and decrypted automatically via `ansible.cfg`: + +```ini +[defaults] +vault_password_file = .vault_pass +``` + +Vaulted variables include: + +- SSH/become credentials (`ansible_password`, `ansible_become_password`) +- Docker Hub credentials (`dockerhub_username`, `dockerhub_password`) +- Deployment variables (`app_name`, `docker_image`, `docker_image_tag`, `app_port`, `app_container_name`) + +Example encrypted file header (`inventory/group_vars/all.yml`): + +```yaml +$ANSIBLE_VAULT;1.1;AES256 +39353632363062313937356432663237316164663962313739316134626164613631373039353332 +6538613039343263396261363233303263343136666163620a336431343664613032636161623861 +``` + +Vault password strategy: + +- keep `.vault_pass` local only +- file permissions `600` +- never commit `.vault_pass` or unencrypted secrets +- commit only vaulted files + +--- + +## 5. Deployment Verification + +### Deploy playbook execution + +Deployment run evidence: + +![Deploy playbook run](./deployment.png) + +### Container status and endpoint checks + +Evidence for running container and successful endpoint checks: + +![Container status and endpoints](./container_status_and_endpoints.png) + +Checks performed: + +- `ansible webservers -a "docker ps"` +- `curl http://:5000/health` +- `curl http://:5000/` + +--- + +## 6. Key Decisions + +### Why use roles instead of plain playbooks? + +Roles keep automation modular and maintainable by isolating responsibility (base setup, Docker setup, app deployment). This improves readability and allows independent updates without rewriting large playbooks. + +### How do roles improve reusability? + +Each role can be reused in other projects or other hosts by changing only variables. The same role logic can be applied to staging/production with different inventories or vaulted values. + +### What makes a task idempotent? + +Idempotent tasks declare desired state (`present`, `started`, `absent`) rather than imperative shell commands. Re-running them does not create extra side effects when the target is already compliant. + +### How do handlers improve efficiency? + +Handlers run only when notified by a changed task, so services are not restarted on every run. This reduces unnecessary restarts and speeds up stable deployments. + +### Why is Ansible Vault necessary? + +Vault protects secrets while keeping infrastructure code in version control. It enables secure collaboration without exposing credentials in plaintext files or shell history. + +--- + +## 7. Challenges + +- SSH authentication initially failed until connection variables were correctly loaded from vaulted inventory-scoped group vars. +- YAML indentation issues (tabs) caused parsing errors; fixed by normalizing files to spaces. +- Connectivity variability (permission denied/timeouts) required separate validation of credentials vs. network reachability. + diff --git a/ansible/docs/available_tags.png b/ansible/docs/available_tags.png new file mode 100644 index 0000000000..79faf38081 Binary files /dev/null and b/ansible/docs/available_tags.png differ diff --git a/ansible/docs/clean_reinstallation.png b/ansible/docs/clean_reinstallation.png new file mode 100644 index 0000000000..2b5e22f051 Binary files /dev/null and b/ansible/docs/clean_reinstallation.png differ diff --git a/ansible/docs/container_status_and_endpoints.png b/ansible/docs/container_status_and_endpoints.png new file mode 100644 index 0000000000..7457fc34d5 Binary files /dev/null and b/ansible/docs/container_status_and_endpoints.png differ diff --git a/ansible/docs/deployment.png b/ansible/docs/deployment.png new file mode 100644 index 0000000000..3b24829484 Binary files /dev/null and b/ansible/docs/deployment.png differ diff --git a/ansible/docs/docker.png b/ansible/docs/docker.png new file mode 100644 index 0000000000..65bc25398a Binary files /dev/null and b/ansible/docs/docker.png differ diff --git a/ansible/docs/docker_compose_container.png b/ansible/docs/docker_compose_container.png new file mode 100644 index 0000000000..f6e5bd5b7a Binary files /dev/null and b/ansible/docs/docker_compose_container.png differ diff --git a/ansible/docs/normal_deployment.png b/ansible/docs/normal_deployment.png new file mode 100644 index 0000000000..c8df056f5f Binary files /dev/null and b/ansible/docs/normal_deployment.png differ diff --git a/ansible/docs/only_packages.png b/ansible/docs/only_packages.png new file mode 100644 index 0000000000..6622d66f6a Binary files /dev/null and b/ansible/docs/only_packages.png differ diff --git a/ansible/docs/provision_yml_first.png b/ansible/docs/provision_yml_first.png new file mode 100644 index 0000000000..e9aa476ae6 Binary files /dev/null and b/ansible/docs/provision_yml_first.png differ diff --git a/ansible/docs/provision_yml_second.png b/ansible/docs/provision_yml_second.png new file mode 100644 index 0000000000..478655ead4 Binary files /dev/null and b/ansible/docs/provision_yml_second.png differ diff --git a/ansible/docs/scenario_4a.png b/ansible/docs/scenario_4a.png new file mode 100644 index 0000000000..0cd8a80b9e Binary files /dev/null and b/ansible/docs/scenario_4a.png differ diff --git a/ansible/docs/scenario_4b.png b/ansible/docs/scenario_4b.png new file mode 100644 index 0000000000..78887cccca Binary files /dev/null and b/ansible/docs/scenario_4b.png differ diff --git a/ansible/docs/second_run_no_changed.png b/ansible/docs/second_run_no_changed.png new file mode 100644 index 0000000000..dc13ad6ea1 Binary files /dev/null and b/ansible/docs/second_run_no_changed.png differ diff --git a/ansible/docs/wipe_only.png b/ansible/docs/wipe_only.png new file mode 100644 index 0000000000..efd8a345e8 Binary files /dev/null and b/ansible/docs/wipe_only.png differ diff --git a/ansible/group_vars/all.yml b/ansible/group_vars/all.yml new file mode 100644 index 0000000000..480b8b0256 --- /dev/null +++ b/ansible/group_vars/all.yml @@ -0,0 +1,20 @@ +$ANSIBLE_VAULT;1.1;AES256 +37346562636264353031653839366236623831656163656330636466303938343531613961616565 +3262363866393863313833303836303035613236323036610a373162316431316131306165636638 +34366339356236303634663231306439333366633934643765313434336131623235316162376262 +3233336366393461390a656438346162376162343030303363303239656138663664626634343236 +61363730376165303234653865356330316361663436316263396461363765313230343332643063 +61386332303166383736363065316565353331393039636530643634663766383237383361356263 +63383433386439316134346539356663653932353536343537323530306239633265303766313132 +39663862616262666538313839663133336431306630323365616461373033373039396437336363 +64333966613637663362313836323838346435323564356637636463303435356236336437393236 +38343737643862323433646639356139633839333537306334373032336434623663643930323332 +31363861623362633832646363653534393334646664363237663432333334323364336463613765 +33306237633139633162643634643661386239633837653934633435636131383136306266623066 +33383835393464376536306663323134643730623133623464323132373866373261613866643338 +39333461663363396662313562346336323833383063626235313232303637366638323531383964 +31353162623839646430396630376335393462303930663832303165643162326532346137363162 +65613338363034363962323963663935633461663165306266393437633666313431623236633532 +64663431613366316431626531653262653762303931333462386365373661313033336461353439 +30663564303563346538636363353739646438386463666438643363663538623161643739343833 +303463383233636632633662386531386363 diff --git a/ansible/inventory/group_vars/all.yml b/ansible/inventory/group_vars/all.yml new file mode 100644 index 0000000000..2eff5427b2 --- /dev/null +++ b/ansible/inventory/group_vars/all.yml @@ -0,0 +1,20 @@ +$ANSIBLE_VAULT;1.1;AES256 +39353632363062313937356432663237316164663962313739316134626164613631373039353332 +6538613039343263396261363233303263343136666163620a336431343664613032636161623861 +61303039663638333535623637323563626433666461313365396136323639393930353266383566 +3765316334323164330a623330313961626638383966393065336330316632666431386466356635 +64633430346362623732396366666666633265373861663064616564333937663562373339633234 +38363964643966626330646565646332343863373461386532626630333038663865373534343165 +65366461343362633933643830373165343762393237636536343066333634316430653939396536 +63346263646130303833316431663264333435626463303863616562643835366262396338646636 +36333130316233333536633861386533363737323033393063366335663732346139633665636234 +34326663616433333162623638366634313530343564396434636464663562633563316430303833 +31616236316364383166373135666339633034643762373766663236306366663865343233356234 +32356135323363623435626464616466366266643761376363333933376332616632613463373836 +37613237653433663764383964373835663733393031626539656164386461613465623433346561 +65626361323838386263353163336232653164333436656137616138323630366133666334373039 +34646165666266393432303936613934313737393233626639643066633536366165623338656161 +36643966373366636330393630303065323139643464666634303838356331643636333934656261 +62643232353066346237306137656336656566633633616537303862353266336334646635663365 +38323834353961346130626134633433303463623963383964666330323465303330383061663761 +376437376334303265646166623661663935 diff --git a/ansible/inventory/hosts.ini b/ansible/inventory/hosts.ini new file mode 100644 index 0000000000..0d22862530 --- /dev/null +++ b/ansible/inventory/hosts.ini @@ -0,0 +1,2 @@ +[webservers] +blazz1t-vm ansible_host=95.31.23.50 ansible_user=blazz1t \ No newline at end of file diff --git a/ansible/playbooks/deploy.yml b/ansible/playbooks/deploy.yml new file mode 100644 index 0000000000..95174b9e0e --- /dev/null +++ b/ansible/playbooks/deploy.yml @@ -0,0 +1,7 @@ +--- +- name: Deploy application + hosts: webservers + become: true + + roles: + - web_app diff --git a/ansible/playbooks/provision.yml b/ansible/playbooks/provision.yml new file mode 100644 index 0000000000..7cc2e6678d --- /dev/null +++ b/ansible/playbooks/provision.yml @@ -0,0 +1,8 @@ +--- +- name: Provision web servers + hosts: webservers + become: true + + roles: + - common + - docker diff --git a/ansible/roles/common/defaults/main.yml b/ansible/roles/common/defaults/main.yml new file mode 100644 index 0000000000..2282a109a1 --- /dev/null +++ b/ansible/roles/common/defaults/main.yml @@ -0,0 +1,7 @@ +--- +common_packages: + - python3-pip + - curl + - git + - vim + - htop diff --git a/ansible/roles/common/tasks/main.yml b/ansible/roles/common/tasks/main.yml new file mode 100644 index 0000000000..ca2a52777c --- /dev/null +++ b/ansible/roles/common/tasks/main.yml @@ -0,0 +1,55 @@ +--- +- name: Common role tasks + block: + + - name: Package management + block: + + - name: Update apt cache + ansible.builtin.apt: + update_cache: true + cache_valid_time: 3600 + + - name: Install common packages + ansible.builtin.apt: + name: "{{ common_packages }}" + state: present + + rescue: + + - name: Attempt to fix apt cache and retry + ansible.builtin.command: apt-get update --fix-missing + changed_when: false + + always: + + - name: Log package block completion + ansible.builtin.file: + path: /tmp/ansible_packages_completed + state: touch + + tags: + - packages + + - name: User management + block: + + - name: Ensure default user exists + ansible.builtin.user: + name: deploy + state: present + create_home: true + + always: + + - name: Log user block completion + ansible.builtin.file: + path: /tmp/ansible_users_completed + state: touch + + tags: + - users + + become: true + tags: + - common \ No newline at end of file diff --git a/ansible/roles/docker/defaults/main.yml b/ansible/roles/docker/defaults/main.yml new file mode 100644 index 0000000000..9c769a4dbd --- /dev/null +++ b/ansible/roles/docker/defaults/main.yml @@ -0,0 +1,21 @@ +--- +docker_apt_prerequisites: + - ca-certificates + - curl + +docker_packages: + - docker-ce + - docker-ce-cli + - containerd.io + - docker-buildx-plugin + - docker-compose-plugin + +docker_python_package: python3-docker +docker_manage_user: "{{ ansible_user }}" + +docker_arch_map: + x86_64: amd64 + aarch64: arm64 + armv7l: armhf + +docker_arch: "{{ docker_arch_map.get(ansible_architecture, ansible_architecture) }}" diff --git a/ansible/roles/docker/handlers/main.yml b/ansible/roles/docker/handlers/main.yml new file mode 100644 index 0000000000..1a5058da5e --- /dev/null +++ b/ansible/roles/docker/handlers/main.yml @@ -0,0 +1,5 @@ +--- +- name: restart docker + ansible.builtin.service: + name: docker + state: restarted diff --git a/ansible/roles/docker/tasks/main.yml b/ansible/roles/docker/tasks/main.yml new file mode 100644 index 0000000000..4891d37509 --- /dev/null +++ b/ansible/roles/docker/tasks/main.yml @@ -0,0 +1,87 @@ +--- +- name: Docker role tasks + block: + + - name: Docker installation + block: + + - name: Install Docker apt prerequisites + ansible.builtin.apt: + name: "{{ docker_apt_prerequisites }}" + state: present + update_cache: true + cache_valid_time: 3600 + + - name: Ensure apt keyrings directory exists + ansible.builtin.file: + path: /etc/apt/keyrings + state: directory + mode: "0755" + + - name: Add Docker GPG key + ansible.builtin.get_url: + url: https://download.docker.com/linux/ubuntu/gpg + dest: /etc/apt/keyrings/docker.asc + mode: "0644" + + - name: Add Docker apt repository + ansible.builtin.apt_repository: + repo: "deb [arch={{ docker_arch }} signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu {{ ansible_distribution_release }} stable" + filename: docker + state: present + notify: restart docker + + - name: Install Docker packages + ansible.builtin.apt: + name: "{{ docker_packages }}" + state: present + update_cache: true + notify: restart docker + + - name: Install Docker Python package for Ansible modules + ansible.builtin.apt: + name: "{{ docker_python_package }}" + state: present + + rescue: + + - name: Wait before retrying Docker GPG setup + ansible.builtin.pause: + seconds: 10 + + - name: Retry apt cache update + ansible.builtin.apt: + update_cache: true + + - name: Retry adding Docker GPG key + ansible.builtin.get_url: + url: https://download.docker.com/linux/ubuntu/gpg + dest: /etc/apt/keyrings/docker.asc + mode: "0644" + + tags: + - docker_install + + - name: Docker configuration + block: + + - name: Add user to docker group + ansible.builtin.user: + name: "{{ docker_manage_user }}" + groups: docker + append: true + + tags: + - docker_config + + always: + + - name: Ensure Docker service is enabled and running + ansible.builtin.service: + name: docker + state: started + enabled: true + + become: true + tags: + - docker \ No newline at end of file diff --git a/ansible/roles/web_app/defaults/main.yml b/ansible/roles/web_app/defaults/main.yml new file mode 100644 index 0000000000..908127d3b6 --- /dev/null +++ b/ansible/roles/web_app/defaults/main.yml @@ -0,0 +1,19 @@ +--- +app_name: devops-app + +docker_image: "{{ dockerhub_username }}/{{ app_name }}" +docker_tag: latest + +web_app_wipe: false + +app_port: 5000 +app_internal_port: 5000 + +compose_project_dir: "/opt/{{ app_name }}" +docker_compose_version: "3.8" + +app_container_name: "{{ app_name }}" + +app_restart_policy: unless-stopped + +app_environment: {} \ No newline at end of file diff --git a/ansible/roles/web_app/handlers/main.yml b/ansible/roles/web_app/handlers/main.yml new file mode 100644 index 0000000000..058264942f --- /dev/null +++ b/ansible/roles/web_app/handlers/main.yml @@ -0,0 +1,6 @@ +--- +- name: restart application container + community.docker.docker_container: + name: "{{ app_container_name }}" + state: started + restart: true diff --git a/ansible/roles/web_app/meta/main.yml b/ansible/roles/web_app/meta/main.yml new file mode 100644 index 0000000000..fc95875336 --- /dev/null +++ b/ansible/roles/web_app/meta/main.yml @@ -0,0 +1,3 @@ +--- +dependencies: + - role: docker \ No newline at end of file diff --git a/ansible/roles/web_app/tasks/main.yml b/ansible/roles/web_app/tasks/main.yml new file mode 100644 index 0000000000..3e60f262f2 --- /dev/null +++ b/ansible/roles/web_app/tasks/main.yml @@ -0,0 +1,37 @@ +--- +- name: Include wipe tasks + ansible.builtin.include_tasks: wipe.yml + tags: + - web_app_wipe + +- name: Deploy application with Docker Compose + block: + + - name: Create application directory + ansible.builtin.file: + path: "/opt/{{ app_name }}" + state: directory + mode: "0755" + + - name: Template docker-compose file + ansible.builtin.template: + src: docker-compose.yml.j2 + dest: "/opt/{{ app_name }}/docker-compose.yml" + mode: "0644" + + - name: Deploy application using Docker Compose + community.docker.docker_compose_v2: + project_src: "/opt/{{ app_name }}" + pull: always + state: present + recreate: auto + + rescue: + + - name: Log deployment failure + ansible.builtin.debug: + msg: "Application deployment failed for {{ app_name }}" + + tags: + - app_deploy + - compose \ No newline at end of file diff --git a/ansible/roles/web_app/tasks/wipe.yml b/ansible/roles/web_app/tasks/wipe.yml new file mode 100644 index 0000000000..eacea6c768 --- /dev/null +++ b/ansible/roles/web_app/tasks/wipe.yml @@ -0,0 +1,29 @@ +--- +- name: Wipe web application + block: + + - name: Stop and remove containers + community.docker.docker_compose_v2: + project_src: "/opt/{{ app_name }}" + state: absent + ignore_errors: true + + - name: Remove docker-compose file + ansible.builtin.file: + path: "/opt/{{ app_name }}/docker-compose.yml" + state: absent + ignore_errors: true + + - name: Remove application directory + ansible.builtin.file: + path: "/opt/{{ app_name }}" + state: absent + ignore_errors: true + + - name: Log wipe completion + ansible.builtin.debug: + msg: "Application {{ app_name }} wiped successfully" + + when: web_app_wipe | bool + tags: + - web_app_wipe \ No newline at end of file diff --git a/ansible/roles/web_app/templates/docker-compose.yml.j2 b/ansible/roles/web_app/templates/docker-compose.yml.j2 new file mode 100644 index 0000000000..84b8fb3cda --- /dev/null +++ b/ansible/roles/web_app/templates/docker-compose.yml.j2 @@ -0,0 +1,27 @@ +version: '3.8' + +services: + {{ app_name }}: + image: {{ docker_image }}:{{ docker_tag }} + container_name: {{ app_name }} + + ports: + - "{{ app_port }}:{{ app_internal_port }}" + + environment: + {% if app_environment | length > 0 %} + {% for key, value in app_environment.items() %} + {{ key }}: "{{ value }}" + {% endfor %} + {% else %} + {} + {% endif %} + + restart: {{ app_restart_policy }} + + networks: + - {{ app_name }}_network + +networks: + {{ app_name }}_network: + driver: bridge \ No newline at end of file diff --git a/app_python/.dockerignore b/app_python/.dockerignore new file mode 100644 index 0000000000..9671f26b9e --- /dev/null +++ b/app_python/.dockerignore @@ -0,0 +1,5 @@ +.venv/ +venv/ +README.md +*/__pycache__/* +.docs/ \ No newline at end of file diff --git a/app_python/.gitignore b/app_python/.gitignore new file mode 100644 index 0000000000..957340efeb --- /dev/null +++ b/app_python/.gitignore @@ -0,0 +1,13 @@ +.venv +# Python +__pycache__/ +.pytest_cache +*.py[cod] +*.log + +# IDE +.vscode/ +.idea/ + +# OS +.DS_Store \ No newline at end of file diff --git a/app_python/Dockerfile b/app_python/Dockerfile new file mode 100644 index 0000000000..2b8f91de53 --- /dev/null +++ b/app_python/Dockerfile @@ -0,0 +1,7 @@ +FROM python:3.13-slim +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt +COPY . . +EXPOSE 5000 +ENTRYPOINT ["uvicorn", "app:app", "--host", "0.0.0.0"] +CMD ["--port", "5000"] \ No newline at end of file diff --git a/app_python/README.md b/app_python/README.md new file mode 100644 index 0000000000..26f1bf1551 --- /dev/null +++ b/app_python/README.md @@ -0,0 +1,38 @@ +[![Test Python App](https://github.com/BlazZ1t/devops-core-course-blazz1t/actions/workflows/python-ci.yml/badge.svg)](https://github.com/BlazZ1t/devops-core-course-blazz1t/actions/workflows/python-ci.yml) + +### Overview + +This is a service that sends basic information about the server it runs on, about the app itself, and can do a health check (so it doesn't have to go to a hospital). + +### Prerequisites + +For running this project you have to have Python 3.11+ installed + +### Installation + +`python -m venv .venv` +For linux: `.venv/Scripts/activate` +For windows: `.venv/Scripts/Acticate.ps1` +`pip install -r requirements.txt` + +### Running the application + +`uvicorn app:app` + +For custom config there are flags `--host` and `--port` for setting the host and the port respectively (duh) + +### API Endpoints + +`GET /` - Serice and system information +`GET /health` - Health check + +### Configuration + +SERVICE_NAME - sets service name +VERSION - sets service version + +### Docker + +* To build a docker image run `docker build [-t image_name] .`. command. +* To run a container run `docker run [-d] [image_name]` (-d for detached (doesn't stay in your terminal)) +* To run pull the existent image from Docker Hub run `docker pull blazz1t/devops_app:1.0.0` diff --git a/app_python/app.py b/app_python/app.py new file mode 100644 index 0000000000..806fe48fca --- /dev/null +++ b/app_python/app.py @@ -0,0 +1,204 @@ +from fastapi import FastAPI, Request +import logging +from pythonjsonlogger import jsonlogger +import os +import socket +import platform +import time +import datetime +from contextlib import asynccontextmanager +from prometheus_client import Counter, Histogram +from prometheus_client import Gauge, generate_latest, CONTENT_TYPE_LATEST +from starlette.responses import Response + +SERVICE_NAME = os.getenv("SERVICE_NAME", "devops-info-service") +VERSION = os.getenv("VERSION", "1.0.0") +START_TIME_UTC = round(time.time()) + +HOST = os.getenv('HOST', '0.0.0.0') +PORT = int(os.getenv('PORT', 5000)) +DEBUG = os.getenv('DEBUG', 'False').lower() == 'true' + +logger = logging.getLogger() +logger.setLevel(logging.INFO) + +logHandler = logging.StreamHandler() +formatter = jsonlogger.JsonFormatter( + "%(asctime)s %(levelname)s %(message)s %(name)s" +) + +logHandler.setFormatter(formatter) +logger.addHandler(logHandler) + + +@asynccontextmanager +async def lifespan(app: FastAPI): + global START_TIME_UTC + + START_TIME_UTC = round(time.time()) + + logger.info( + "Application startup", + extra={ + "service": SERVICE_NAME, + "version": VERSION, + "host": HOST, + "port": PORT, + "debug": DEBUG + } + ) + + yield + + logger.info("Application shutdown") + +app = FastAPI( + docs_url=None, + redoc_url=None, + openapi_url=None, + lifespan=lifespan +) + + +@app.middleware("http") +async def metrics_middleware(request: Request, call_next): + start_time = time.time() + endpoint = normalize_path(request) + method = request.method + + http_requests_in_progress.inc() + + try: + response = await call_next(request) + status_code = response.status_code + except Exception: + status_code = 500 + http_requests_total.labels(method=method, + endpoint=endpoint, + status_code=status_code).inc() + http_requests_in_progress.dec() + raise + + duration = time.time() - start_time + + http_requests_total.labels( + method=method, + endpoint=endpoint, + status_code=status_code + ).inc() + + http_request_duration_seconds.labels( + method=method, + endpoint=endpoint + ).observe(duration) + + http_requests_in_progress.dec() + + return response + + +def get_system_info(): + return { + "hostname": socket.gethostname(), + "platform": platform.system(), + "architecture": platform.machine(), + "python_version": platform.python_version() + } + + +def get_runtime_info(): + print("Startup seconds are ", START_TIME_UTC) + return { + "uptime_seconds": round(time.time()) - START_TIME_UTC, + "uptime_human": str(datetime.timedelta(seconds=round(time.time()) + - START_TIME_UTC)), + "current_time": time.strftime("%H hours %M minutes", time.localtime()), + "timezone": time.timezone + } + + +def get_endpoints(): + result = [] + for route in app.routes: + result.append( + { + "path": route.path, + "method": list(route.methods)[0], + "description": "" + } + ) + + return result + + +@app.get("/") +def get_server_info(request: Request): + get_endpoints() + return { + "service": { + "name": SERVICE_NAME, + "version": VERSION, + "description": "DevOps course info service", + "framework": "FastAPI" + }, + "system": get_system_info(), + "runtime": get_runtime_info(), + "request": { + "client_ip": request.client.host, + "user_agent": request.headers.get("user-Agent"), + "method": request.method, + "path": request.url.path + }, + "endpoints": get_endpoints() + } + + +@app.get("/health") +def get_health(): + return { + "status": "healthy", + "timestamp": datetime.datetime.now().isoformat(), + "uptime_seconds": round(time.time()) - START_TIME_UTC + } + + +http_requests_total = Counter( + 'http_requests_total', + 'Total HTTP requests', + ['method', 'endpoint', 'status_code'] +) + +http_request_duration_seconds = Histogram( + 'http_request_duration_seconds', + 'HTTP request duration', + ['method', 'endpoint'] +) + +http_requests_in_progress = Gauge( + 'http_requests_in_progress', + 'HTTP requests currently being processed' +) + +# --- App-specific metrics --- +endpoint_calls = Counter( + 'devops_info_endpoint_calls', + 'Endpoint calls', + ['endpoint'] +) + +system_info_duration = Histogram( + 'devops_info_system_collection_seconds', + 'System info collection time' +) + + +def normalize_path(request: Request): + route = request.scope.get("route") + if route and hasattr(route, "path"): + return route.path + return request.url.path + + +@app.get("/metrics") +def metrics(): + return Response(generate_latest(), media_type=CONTENT_TYPE_LATEST) diff --git a/app_python/docs/LAB01.md b/app_python/docs/LAB01.md new file mode 100644 index 0000000000..bb04ac4fb1 --- /dev/null +++ b/app_python/docs/LAB01.md @@ -0,0 +1,40 @@ +# DevOps Info Service – Documentation + +## 1. Framework Selection + +### Chosen Framework: FastAPI + +**FastAPI** was selected to build the DevOps Info Service because it is lightweight, high-performance, and designed for building APIs quickly and correctly. + +**Reasons for choosing FastAPI:** +- High performance (built on Starlette and Pydantic) +- Async support out of the box +- Simple and readable code +- Automatic request handling and validation +- Ideal for microservices and DevOps tooling + +### Comparison With Alternatives + +| Framework | Language | Performance | Async Support | Use Case | +|---------|---------|-------------|---------------|----------| +| **FastAPI** | Python | ⭐⭐⭐⭐ | Yes (native) | Modern APIs, microservices | +| Flask | Python | ⭐⭐ | Limited | Small/simple APIs | +| Django REST | Python | ⭐⭐ | Partial | Large monolithic apps | +| Express.js | JavaScript | ⭐⭐⭐ | Yes | Node.js ecosystems | + +**Conclusion:** +FastAPI provides the best balance between performance, simplicity, and modern API development for this project. + +--- + +## 2. Best Practices Applied + +### 2.1 Environment-Based Configuration + +Configuration values such as service name, version, host, and port are read from environment variables. + + +SERVICE_NAME = os.getenv("SERVICE_NAME", "devops-info-service") +VERSION = os.getenv("VERSION", "1.0.0") +PORT = int(os.getenv('PORT', 5000)) +DEBUG = os.getenv('DEBUG', 'False').lower() == 'true' \ No newline at end of file diff --git a/app_python/docs/LAB02.md b/app_python/docs/LAB02.md new file mode 100644 index 0000000000..7df38bf521 --- /dev/null +++ b/app_python/docs/LAB02.md @@ -0,0 +1,205 @@ +## Docker Best Practices applied + +* .dockerignore file was added to not include .venv, __pycache\_\_ and other irrelevant files into a docker image. + +* Docker is being run as a non-root user, to prevent it from being abused heavily in case of breach + +* Requirements installed before copying the code makes it more deterministic and allows docker to cache dependencies when needed + +* No package cache is bloating an image, with usage of `--no-cache-dir` flag in pip install so it is lightweight + +## Image information & Decisions + +* `python:3.13-slim` is used due to it being the latest allowed image and it being really lightweight + +* Final image weight is 58 megabytes which is reasonable for a simple FastAPI app + +* Layer structure is the following: + + * Base OS + Python runtime + * Python dependencies + * Application source code + * Runtime command + +* Optimization choices + + * Slim image + * No `pip` cache + * .dockerignore used + * Explicit port exposure + +* Build & Run process + + Here's the command used for build `docker build -t devops_app .` and here's the full output: + + `Sending build context to Docker daemon 158.2kB +Step 1/6 : FROM python:3.13-slim + ---> 2b9c9803c6a2 +Step 2/6 : COPY requirements.txt . + ---> e008ec81cade +Step 3/6 : RUN pip install --no-cache-dir -r requirements.txt + ---> Running in 8a9075a6d920 +Collecting fastapi==0.115.0 (from -r requirements.txt (line 1)) + Downloading fastapi-0.115.0-py3-none-any.whl.metadata (27 kB) +Collecting uvicorn==0.32.0 (from uvicorn[standard]==0.32.0->-r requirements.txt (line 2)) + Downloading uvicorn-0.32.0-py3-none-any.whl.metadata (6.6 kB) +Collecting starlette<0.39.0,>=0.37.2 (from fastapi==0.115.0->-r requirements.txt (line 1)) + Downloading starlette-0.38.6-py3-none-any.whl.metadata (6.0 kB) +Collecting pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4 (from fastapi==0.115.0->-r requirements.txt (line 1)) + Downloading pydantic-2.12.5-py3-none-any.whl.metadata (90 kB) +Collecting typing-extensions>=4.8.0 (from fastapi==0.115.0->-r requirements.txt (line 1)) + Downloading typing_extensions-4.15.0-py3-none-any.whl.metadata (3.3 kB) +Collecting click>=7.0 (from uvicorn==0.32.0->uvicorn[standard]==0.32.0->-r requirements.txt (line 2)) + Downloading click-8.3.1-py3-none-any.whl.metadata (2.6 kB) +Collecting h11>=0.8 (from uvicorn==0.32.0->uvicorn[standard]==0.32.0->-r requirements.txt (line 2)) + Downloading h11-0.16.0-py3-none-any.whl.metadata (8.3 kB) +Collecting httptools>=0.5.0 (from uvicorn[standard]==0.32.0->-r requirements.txt (line 2)) + Downloading httptools-0.7.1-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl.metadata (3.5 kB) +Collecting python-dotenv>=0.13 (from uvicorn[standard]==0.32.0->-r requirements.txt (line 2)) + Downloading python_dotenv-1.2.1-py3-none-any.whl.metadata (25 kB) +Collecting pyyaml>=5.1 (from uvicorn[standard]==0.32.0->-r requirements.txt (line 2)) + Downloading pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (2.4 kB) +Collecting uvloop!=0.15.0,!=0.15.1,>=0.14.0 (from uvicorn[standard]==0.32.0->-r requirements.txt (line 2)) + Downloading uvloop-0.22.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (4.9 kB) +Collecting watchfiles>=0.13 (from uvicorn[standard]==0.32.0->-r requirements.txt (line 2)) + Downloading watchfiles-1.1.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB) +Collecting websockets>=10.4 (from uvicorn[standard]==0.32.0->-r requirements.txt (line 2)) + Downloading websockets-16.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl.metadata (6.8 kB) +Collecting annotated-types>=0.6.0 (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi==0.115.0->-r requirements.txt (line 1)) + Downloading annotated_types-0.7.0-py3-none-any.whl.metadata (15 kB) +Collecting pydantic-core==2.41.5 (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi==0.115.0->-r requirements.txt (line 1)) + Downloading pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.3 kB) +Collecting typing-inspection>=0.4.2 (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi==0.115.0->-r requirements.txt (line 1)) + Downloading typing_inspection-0.4.2-py3-none-any.whl.metadata (2.6 kB) +Collecting anyio<5,>=3.4.0 (from starlette<0.39.0,>=0.37.2->fastapi==0.115.0->-r requirements.txt (line 1)) + Downloading anyio-4.12.1-py3-none-any.whl.metadata (4.3 kB) +Collecting idna>=2.8 (from anyio<5,>=3.4.0->starlette<0.39.0,>=0.37.2->fastapi==0.115.0->-r requirements.txt (line 1)) + Downloading idna-3.11-py3-none-any.whl.metadata (8.4 kB) +Downloading fastapi-0.115.0-py3-none-any.whl (94 kB) +Downloading uvicorn-0.32.0-py3-none-any.whl (63 kB) +Downloading pydantic-2.12.5-py3-none-any.whl (463 kB) +Downloading pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.1 MB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.1/2.1 MB 253.1 kB/s 0:00:08 +Downloading starlette-0.38.6-py3-none-any.whl (71 kB) +Downloading anyio-4.12.1-py3-none-any.whl (113 kB) +Downloading annotated_types-0.7.0-py3-none-any.whl (13 kB) +Downloading click-8.3.1-py3-none-any.whl (108 kB) +Downloading h11-0.16.0-py3-none-any.whl (37 kB) +Downloading httptools-0.7.1-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl (478 kB) +Downloading idna-3.11-py3-none-any.whl (71 kB) +Downloading python_dotenv-1.2.1-py3-none-any.whl (21 kB) +Downloading pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (801 kB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 801.6/801.6 kB 411.7 kB/s 0:00:01 +Downloading typing_extensions-4.15.0-py3-none-any.whl (44 kB) +Downloading typing_inspection-0.4.2-py3-none-any.whl (14 kB) +Downloading uvloop-0.22.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (4.4 MB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 4.4/4.4 MB 480.5 kB/s 0:00:08 +Downloading watchfiles-1.1.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (456 kB) +Downloading websockets-16.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl (184 kB) +Installing collected packages: websockets, uvloop, typing-extensions, pyyaml, python-dotenv, idna, httptools, h11, click, annotated-types, uvicorn, typing-inspection, pydantic-core, anyio, watchfiles, starlette, pydantic, fastapi + +Successfully installed annotated-types-0.7.0 anyio-4.12.1 click-8.3.1 fastapi-0.115.0 h11-0.16.0 httptools-0.7.1 idna-3.11 pydantic-2.12.5 pydantic-core-2.41.5 python-dotenv-1.2.1 pyyaml-6.0.3 starlette-0.38.6 typing-extensions-4.15.0 typing-inspection-0.4.2 uvicorn-0.32.0 uvloop-0.22.1 watchfiles-1.1.1 websockets-16.0 + +[notice] A new release of pip is available: 25.3 -> 26.0 +[notice] To update, run: pip install --upgrade pip + ---> Removed intermediate container 8a9075a6d920 + ---> 114df2b2de81 +Step 4/6 : COPY . . + ---> 0235449a3ed7 +Step 5/6 : EXPOSE 8000 + ---> Running in 866694e6fd4e + ---> Removed intermediate container 866694e6fd4e + ---> 24194f119c31 +Step 6/6 : CMD ["uvicorn", "app:app", "--host", "0.0.0.0"] + ---> Running in 38ac2fe4d38c + ---> Removed intermediate container 38ac2fe4d38c + ---> be8b407f827b +Successfully built be8b407f827b +Successfully tagged devops_app:latest``` + +* Here are logs from inside the docker: + +`   docker logs unruffled_jemison +INFO: Started server process [1] +INFO: Waiting for application startup. +INFO: Application startup complete. +INFO: Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit) +recording start time +1770234098 +Startup seconds are 1770234098 +INFO: 172.17.0.1:33966 - "GET / HTTP/1.1" 200 OK +INFO: 172.17.0.1:41494 - "GET /health HTTP/1.1" 200 OK` + +DockerHub URL: -- https://hub.docker.com/repository/docker/blazz1t/devops_app/general + +And logs of successful push: + +`docker push blazz1t/devops_app:1.0.0 +The push refers to repository [docker.io/blazz1t/devops_app] +25f1391e6119: Pushed +b3639af23419: Mounted from library/python +3290dd2b7743: Pushed +0c8d55a45c0d: Mounted from library/python +38e1e046d3d0: Pushed +8a3ca8cbd12d: Mounted from library/python +0da4a108bcf2: Mounted from library/python +1.0.0: digest: sha256:be8b407f827b177fcba0e6462c5b0ae3890fe39335c1f97e711d07bd934ebfce size: 1845` + + +## Why This Dockerfile Works + +* Uses a compatible Python runtime + +* Installs dependencies before copying app code + +* Runs uvicorn directly as the container process + +* Exposes the correct application port + +### If layer order was changed + +* If COPY . . was placed before installing dependencies: + +* Any code change would invalidate the dependency layer + +* Slower rebuilds + +* Less efficient CI pipelines + +### Security Considerations + +* Minimal base image + +* No build-time secrets stored + +* .dockerignore prevents accidental leaks + +* Non-root user recommended + +Challenges & Solutions +Issue: Large Image Size + +Cause: Python dependencies and base image overhead +Solution: Use slim image and remove pip cache + +Issue: Rebuilding Took Too Long + +Cause: No effective layer caching +Solution: Reorder Dockerfile layers in future iterations + +Issue: Security Concerns + +Cause: Default root user +Solution: Introduce non-root user + +What I Learned + +Dockerfile layer order directly affects build performance + +Small base images matter a lot + +.dockerignore is not optional + +Security defaults are rarely safe + +Docker images should be designed, not just made \ No newline at end of file diff --git a/app_python/docs/LAB03.md b/app_python/docs/LAB03.md new file mode 100644 index 0000000000..1b8e624a9c --- /dev/null +++ b/app_python/docs/LAB03.md @@ -0,0 +1,19 @@ +## Testing + +- Pytest was chosen for its speed and ease of use +- Tests are structured as file per function, in the tests/ folder +- To run tests locally first run `pip install -r app_python/requirements-dev.txt` and then run `pytest .` + +![Alt text](screenshots/04-test-output.png) + +## Github Actions Pipeline +- Pipeline triggers on push and pull request with changes in app_python folder (excluding docs/) to not trigger the pipeline when not necessary +- Basic actions for checking out code and setting up python is self explanatory as well as docker actions, they are official and the best for the job. The action for getting current time was used to setup versioning +- I tag versions with current date +- https://github.com/BlazZ1t/devops-core-course-blazz1t/actions/runs/21946301431 +- ![](screenshots/05-successful-pipeline-run.png) + +### Optimizations implemented +- Docker push job requires tests and linter to pass +- Dependencies are cached using setup-python action +- DockerHub credentials are stored as environment variables \ No newline at end of file diff --git a/app_python/docs/screenshots/01-main-endpoint.png b/app_python/docs/screenshots/01-main-endpoint.png new file mode 100644 index 0000000000..43b9657946 Binary files /dev/null and b/app_python/docs/screenshots/01-main-endpoint.png differ diff --git a/app_python/docs/screenshots/02-health-check.png b/app_python/docs/screenshots/02-health-check.png new file mode 100644 index 0000000000..c8c21de73d Binary files /dev/null and b/app_python/docs/screenshots/02-health-check.png differ diff --git a/app_python/docs/screenshots/03-formatted-output.png b/app_python/docs/screenshots/03-formatted-output.png new file mode 100644 index 0000000000..a696e0311a Binary files /dev/null and b/app_python/docs/screenshots/03-formatted-output.png differ diff --git a/app_python/docs/screenshots/04-test-output.png b/app_python/docs/screenshots/04-test-output.png new file mode 100644 index 0000000000..114f467cf9 Binary files /dev/null and b/app_python/docs/screenshots/04-test-output.png differ diff --git a/app_python/docs/screenshots/05-successful-pipeline-run.png b/app_python/docs/screenshots/05-successful-pipeline-run.png new file mode 100644 index 0000000000..bb178fa235 Binary files /dev/null and b/app_python/docs/screenshots/05-successful-pipeline-run.png differ diff --git a/app_python/docs/screenshots/output.jpg b/app_python/docs/screenshots/output.jpg new file mode 100644 index 0000000000..87eeecfb1a Binary files /dev/null and b/app_python/docs/screenshots/output.jpg differ diff --git a/app_python/requirements-dev.txt b/app_python/requirements-dev.txt new file mode 100644 index 0000000000..b6104cf760 --- /dev/null +++ b/app_python/requirements-dev.txt @@ -0,0 +1,7 @@ +pytest +httpx +flake8 +fastapi==0.115.0 +uvicorn[standard]==0.32.0 +python-json-logger +prometheus-client==0.23.1 \ No newline at end of file diff --git a/app_python/requirements.txt b/app_python/requirements.txt new file mode 100644 index 0000000000..35f2b6917c --- /dev/null +++ b/app_python/requirements.txt @@ -0,0 +1,4 @@ +fastapi==0.115.0 +uvicorn[standard]==0.32.0 +python-json-logger +prometheus-client==0.23.1 \ No newline at end of file diff --git a/app_python/tests/__init__.py b/app_python/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/app_python/tests/health_test.py b/app_python/tests/health_test.py new file mode 100644 index 0000000000..ea4a22cecd --- /dev/null +++ b/app_python/tests/health_test.py @@ -0,0 +1,21 @@ +from fastapi.testclient import TestClient +from app import app + +client = TestClient(app) + + +def test_health_endpoint(): + """Test health endpoint returns correct status""" + response = client.get("/health") + + assert response.status_code == 200 + + data = response.json() + + assert "status" in data + assert "timestamp" in data + assert "uptime_seconds" in data + + assert data["status"] == "healthy" + assert isinstance(data["uptime_seconds"], int) + assert data["uptime_seconds"] >= 0 diff --git a/app_python/tests/root_test.py b/app_python/tests/root_test.py new file mode 100644 index 0000000000..9a6d4ac1af --- /dev/null +++ b/app_python/tests/root_test.py @@ -0,0 +1,57 @@ +from fastapi.testclient import TestClient +from app import app + +client = TestClient(app) + + +def test_root_endpoint_structure(): + """Test root endpoint returns expected structure and fields""" + response = client.get("/") + + assert response.status_code == 200 + + data = response.json() + + # Check main sections exist + assert "service" in data + assert "system" in data + assert "runtime" in data + assert "request" in data + assert "endpoints" in data + + # Check service fields + service = data["service"] + assert "name" in service + assert "version" in service + assert "description" in service + assert "framework" in service + assert service["framework"] == "FastAPI" + + # Check system fields + system = data["system"] + assert "hostname" in system + assert "platform" in system + assert "architecture" in system + assert "python_version" in system + + # Check runtime fields + runtime = data["runtime"] + assert "uptime_seconds" in runtime + assert "uptime_human" in runtime + assert "current_time" in runtime + assert "timezone" in runtime + assert isinstance(runtime["uptime_seconds"], int) + assert runtime["uptime_seconds"] >= 0 + + # Check request fields + request_info = data["request"] + assert "client_ip" in request_info + assert "user_agent" in request_info + assert "method" in request_info + assert "path" in request_info + assert request_info["method"] == "GET" + assert request_info["path"] == "/" + + # Check endpoints is a list + assert isinstance(data["endpoints"], list) + assert len(data["endpoints"]) >= 1 diff --git a/k8s/README.md b/k8s/README.md new file mode 100644 index 0000000000..6c3140981e --- /dev/null +++ b/k8s/README.md @@ -0,0 +1,581 @@ +# Lab 9 — Kubernetes Fundamentals + +## Overview + +This lab focused on deploying a containerized FastAPI application to a local Kubernetes cluster using declarative manifests and production-oriented Kubernetes practices. The deployment includes a Kubernetes Deployment with rolling updates, health checks, and resource limits, as well as a NodePort Service for external accessibility. + +**Cluster Technology:** Minikube +**Kubernetes Version:** 1.33+ +**Container Runtime:** Docker +**Application Framework:** FastAPI +**Container Image:** `blazz1t/devops_app:2026.04.20` + +--- + +# 1. Architecture Overview + +## Deployment Architecture + +The application is deployed into a Minikube Kubernetes cluster using the following architecture: + +```text + +----------------------+ + | NodePort | + | devops-app-service | + | Port: 80 | + +----------+-----------+ + | + v + +------------+------------+ + | Deployment | + | devops-app | + | Replicas: 3 | + +------------+------------+ + | + ------------------------------------------------- + | | | + v v v ++---------------+ +---------------+ +---------------+ +| Pod #1 | | Pod #2 | | Pod #3 | +| FastAPI App | | FastAPI App | | FastAPI App | +| Port: 8000 | | Port: 8000 | | Port: 8000 | ++---------------+ +---------------+ +---------------+ +``` + +## Networking Flow + +1. External traffic enters through the Kubernetes NodePort Service. +2. The Service routes traffic to healthy Pods using label selectors. +3. The Deployment ensures the desired number of replicas remain running. +4. Liveness and readiness probes continuously verify application health. + +## Resource Allocation Strategy + +The application uses conservative resource requests and limits suitable for local development and lightweight production workloads: + +| Resource | Request | Limit | +| -------- | ------- | ----- | +| CPU | 100m | 500m | +| Memory | 128Mi | 256Mi | + +### Rationale + +* **Requests** guarantee minimum resources for scheduling. +* **Limits** prevent Pods from consuming excessive cluster resources. +* The values are appropriate for a lightweight FastAPI API service. + +--- + +# 2. Kubernetes Setup + +## Why Minikube? + +Minikube was selected because it provides a simple and complete local Kubernetes environment with built-in support for: + +* Kubernetes control plane components +* NodePort services +* Ingress addons +* Easy local development and debugging +* Quick startup and teardown + +Minikube is ideal for learning Kubernetes concepts locally before deploying workloads to cloud-managed clusters. + +## Cluster Verification Commands + +```bash +kubectl cluster-info +kubectl get nodes +kubectl get namespaces +``` + +## Example Cluster Output + +```bash +$ kubectl cluster-info +Kubernetes control plane is running at https://127.0.0.1:6443 +CoreDNS is running at https://127.0.0.1:6443/api/v1/namespaces/kube-system/services/kube-dns:dns/proxy + +$ kubectl get nodes +NAME STATUS ROLES AGE VERSION +minikube Ready control-plane 1d v1.33.x +``` + +--- + +# 3. Manifest Files + +## Deployment Manifest — `k8s/deployment.yml` + +The Deployment manifest defines the desired application state and manages Pods automatically. + +### Key Features + +* 3 application replicas +* Rolling update strategy +* Resource requests and limits +* Liveness and readiness probes +* Label-based Pod selection +* Declarative deployment management + +## Deployment Manifest + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: devops-app + labels: + app: devops-app +spec: + replicas: 3 + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 + selector: + matchLabels: + app: devops-app + template: + metadata: + labels: + app: devops-app + spec: + containers: + - name: devops-app + image: blazz1t/devops_app:2026.04.20 + args: ["--port", "8000"] + ports: + - containerPort: 8000 + + resources: + requests: + cpu: "100m" + memory: "128Mi" + limits: + cpu: "500m" + memory: "256Mi" + + livenessProbe: + httpGet: + path: /health + port: 8000 + initialDelaySeconds: 5 + periodSeconds: 10 + timeoutSeconds: 2 + failureThreshold: 3 + + readinessProbe: + httpGet: + path: /health + port: 8000 + initialDelaySeconds: 5 + periodSeconds: 10 + timeoutSeconds: 2 + failureThreshold: 3 +``` + +## Service Manifest — `k8s/service.yml` + +The Service exposes the Deployment externally using a NodePort. + +### Key Features + +* NodePort service type +* Stable networking endpoint +* Load balancing across Pods +* Label selector targeting application Pods + +## Service Manifest + +```yaml +apiVersion: v1 +kind: Service +metadata: + name: devops-app-service + labels: + app: devops-app +spec: + type: NodePort + selector: + app: devops-app + ports: + - name: http + protocol: TCP + port: 80 + targetPort: 8000 + nodePort: 30007 +``` + +--- + +# 4. Application Description + +The deployed application is a FastAPI-based DevOps information service. + +## Application Features + +* REST API built with FastAPI +* JSON structured logging +* Health endpoint for Kubernetes probes +* Prometheus metrics endpoint +* Runtime and system information reporting +* Request monitoring and metrics collection + +## Available Endpoints + +| Endpoint | Description | +| ---------- | --------------------------- | +| `/` | Main application endpoint | +| `/health` | Health check endpoint | +| `/metrics` | Prometheus metrics endpoint | + +## Health Checks + +Both liveness and readiness probes use the `/health` endpoint. + +### Why This Endpoint Was Chosen + +The `/health` endpoint: + +* Returns lightweight JSON responses +* Verifies application responsiveness +* Allows Kubernetes to restart unhealthy containers +* Prevents traffic routing to unavailable Pods + +Example response: + +```json +{ + "status": "healthy", + "timestamp": "2026-05-18T22:10:00", + "uptime_seconds": 120 +} +``` + +--- + +# 5. Deployment Operations + +## Applying Kubernetes Resources + +```bash +kubectl apply -f k8s/deployment.yml +kubectl apply -f k8s/service.yml +``` + +## Verifying Resources + +```bash +kubectl get deployments +kubectl get pods +kubectl get services +kubectl get all +``` + +## Deployment Description + +```bash +kubectl describe deployment devops-app +``` + +## Accessing the Service + +```bash +minikube service devops-app-service +``` + +Alternatively: + +```bash +kubectl port-forward service/devops-app-service 8080:80 +``` + +The application becomes accessible through the browser or curl. + +Example: + +```bash +curl http://127.0.0.1:8080/health +``` + +--- + +# 6. Scaling Demonstration + +## Scaling to 5 Replicas + +The Deployment was scaled from 3 to 5 replicas. + +### Command Used + +```bash +kubectl scale deployment/devops-app --replicas=5 +``` + +## Verifying Scaling + +```bash +kubectl get pods +kubectl rollout status deployment/devops-app +``` + +Kubernetes automatically scheduled and created additional Pods to match the desired state. + +--- + +# 7. Rolling Updates and Rollbacks + +## Rolling Update Strategy + +The Deployment uses a rolling update strategy configured with: + +```yaml +strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 +``` + +### Benefits + +* Zero downtime deployments +* Gradual replacement of old Pods +* Continuous application availability +* Safe deployment process + +## Performing a Rolling Update + +An updated image version or configuration can be applied using: + +```bash +kubectl apply -f k8s/deployment.yml +``` + +## Monitoring Rollout Status + +```bash +kubectl rollout status deployment/devops-app +kubectl rollout history deployment/devops-app +``` + +## Rollback Demonstration + +Rollback capability was verified using: + +```bash +kubectl rollout undo deployment/devops-app +``` + +Kubernetes restored the previous ReplicaSet automatically. + +--- + +# 8. Deployment Evidence + +## kubectl get all + +![kubectl get all](screenshots_9/kubectl_get_all.png) + +## kubectl get pods,svc + +![kubectl get pods services](screenshots_9/kubectl_get_pods_services.png) + +## Deployment Description + +![deployment description](screenshots_9/deployment_description.png) + +## Scaling Demonstration + +![scaling output](screenshots_9/scaling_output.png) + +## Rolling Update Output + +![rollout update](screenshots_9/rollout_update_output.png) + +--- + +# 9. Production Considerations + +## Resource Limits + +Resource requests and limits help: + +* Prevent resource starvation +* Improve cluster stability +* Enable efficient scheduling +* Avoid noisy neighbor problems + +## Health Monitoring + +Liveness and readiness probes improve reliability by: + +* Restarting failed containers automatically +* Preventing traffic from reaching unhealthy Pods +* Supporting rolling updates safely + +## Future Improvements + +If deploying this workload to production, the following improvements would be implemented: + +### Security + +* Use Kubernetes Secrets for sensitive values +* Enable Pod Security Standards +* Add network policies +* Use image signing and vulnerability scanning + +### Observability + +* Deploy Prometheus and Grafana +* Add centralized logging with Loki or ELK +* Configure alerting rules +* Add distributed tracing + +### Scalability + +* Horizontal Pod Autoscaler (HPA) +* Cluster autoscaling +* Multi-node cluster deployment + +### Reliability + +* Ingress controller with TLS +* Canary deployments +* Multi-environment GitOps workflow +* Persistent monitoring dashboards + +--- + +# 10. Challenges and Solutions + +## Challenges Encountered + +### 1. Probe Configuration + +Initially, readiness probes failed because the application startup timing was too aggressive. + +### Solution + +Adjusted: + +* `initialDelaySeconds` +* `periodSeconds` +* probe timeout values + +This allowed the application enough time to initialize before health checks began. + +--- + +### 2. Service Accessibility + +Accessing the application externally required understanding the relationship between: + +* Service ports +* Container ports +* NodePort mappings + +### Solution + +Used: + +```bash +minikube service devops-app-service +``` + +to expose the application correctly. + +--- + +### 3. Understanding Declarative Kubernetes + +One major learning outcome was understanding how Kubernetes continuously reconciles actual cluster state with the desired declarative state. + +This reinforced concepts such as: + +* Controllers +* Desired state management +* ReplicaSets +* Self-healing infrastructure +* Rolling deployments + +--- + +# 11. Key Learnings + +Through this lab, the following Kubernetes concepts were learned and practiced: + +* Kubernetes architecture fundamentals +* Declarative resource management +* Deployments and ReplicaSets +* Services and networking +* Resource requests and limits +* Health checks and probes +* Rolling updates and rollbacks +* Scaling applications +* Cluster debugging with kubectl + +The lab demonstrated how Kubernetes automates orchestration tasks that would otherwise require manual operational management. + +--- + +# 12. Useful Commands Reference + +## Cluster Commands + +```bash +kubectl cluster-info +kubectl get nodes +kubectl get namespaces +``` + +## Deployment Commands + +```bash +kubectl apply -f k8s/deployment.yml +kubectl get deployments +kubectl describe deployment devops-app +``` + +## Pod Commands + +```bash +kubectl get pods +kubectl logs +kubectl describe pod +``` + +## Service Commands + +```bash +kubectl get services +kubectl describe service devops-app-service +``` + +## Scaling and Rollouts + +```bash +kubectl scale deployment/devops-app --replicas=5 +kubectl rollout status deployment/devops-app +kubectl rollout history deployment/devops-app +kubectl rollout undo deployment/devops-app +``` + +--- + +# Conclusion + +This lab successfully demonstrated the deployment and management of a production-oriented FastAPI application on Kubernetes using Minikube. + +The implementation included: + +* Declarative manifests +* Rolling updates +* Resource management +* Health checks +* NodePort networking +* Scaling operations +* Rollback capabilities + +The project provided practical experience with Kubernetes fundamentals and established a strong foundation for future topics such as Helm, GitOps, observability, and advanced traffic management. diff --git a/k8s/deployment.yaml b/k8s/deployment.yaml new file mode 100644 index 0000000000..93a9e3e42b --- /dev/null +++ b/k8s/deployment.yaml @@ -0,0 +1,53 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: devops-app + labels: + app: devops-app +spec: + replicas: 3 + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 + selector: + matchLabels: + app: devops-app + template: + metadata: + labels: + app: devops-app + spec: + containers: + - name: devops-app + image: blazz1t/devops_app:2026.04.20 + args: ["--port", "8000"] + ports: + - containerPort: 8000 + + resources: + requests: + cpu: "100m" + memory: "128Mi" + limits: + cpu: "500m" + memory: "256Mi" + + livenessProbe: + httpGet: + path: /health + port: 8000 + initialDelaySeconds: 5 + periodSeconds: 10 + timeoutSeconds: 2 + failureThreshold: 3 + + readinessProbe: + httpGet: + path: /health + port: 8000 + initialDelaySeconds: 5 + periodSeconds: 10 + timeoutSeconds: 2 + failureThreshold: 3 \ No newline at end of file diff --git a/k8s/screenshots_9/deployment_description.png b/k8s/screenshots_9/deployment_description.png new file mode 100644 index 0000000000..cf735c87b0 Binary files /dev/null and b/k8s/screenshots_9/deployment_description.png differ diff --git a/k8s/screenshots_9/kubectl_get_all.png b/k8s/screenshots_9/kubectl_get_all.png new file mode 100644 index 0000000000..65dd0dd773 Binary files /dev/null and b/k8s/screenshots_9/kubectl_get_all.png differ diff --git a/k8s/screenshots_9/kubectl_get_pods_services.png b/k8s/screenshots_9/kubectl_get_pods_services.png new file mode 100644 index 0000000000..1488599876 Binary files /dev/null and b/k8s/screenshots_9/kubectl_get_pods_services.png differ diff --git a/k8s/screenshots_9/rollout_update_output.png b/k8s/screenshots_9/rollout_update_output.png new file mode 100644 index 0000000000..d84ece2756 Binary files /dev/null and b/k8s/screenshots_9/rollout_update_output.png differ diff --git a/k8s/screenshots_9/scaling_output.png b/k8s/screenshots_9/scaling_output.png new file mode 100644 index 0000000000..71bb04ea69 Binary files /dev/null and b/k8s/screenshots_9/scaling_output.png differ diff --git a/k8s/service.yml b/k8s/service.yml new file mode 100644 index 0000000000..3d61cba987 --- /dev/null +++ b/k8s/service.yml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: Service +metadata: + name: devops-app-service + labels: + app: devops-app +spec: + type: NodePort + selector: + app: devops-app + ports: + - name: http + protocol: TCP + port: 80 + targetPort: 8000 + nodePort: 30007 \ No newline at end of file diff --git a/monitoring/docker-compose.yml b/monitoring/docker-compose.yml new file mode 100644 index 0000000000..0b241ed072 --- /dev/null +++ b/monitoring/docker-compose.yml @@ -0,0 +1,139 @@ +version: "3.8" + +services: + loki: + image: grafana/loki:3.0.0 + container_name: loki + ports: + - "3100:3100" + command: -config.file=/etc/loki/config.yml + volumes: + - ./loki/config.yml:/etc/loki/config.yml:ro + - loki-data:/loki + networks: + - monitoring + healthcheck: + test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:3100/ready || exit 1"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 10s + deploy: + resources: + limits: + cpus: '1.0' + memory: 1G + reservations: + cpus: '0.5' + memory: 512M + + promtail: + image: grafana/promtail:3.0.0 + container_name: promtail + user: root + command: -config.file=/etc/promtail/config.yml + ports: + - "9080:9080" + volumes: + - ./promtail/config.yml:/etc/promtail/config.yml:ro + - /var/run/docker.sock:/var/run/docker.sock:ro + depends_on: + - loki + networks: + - monitoring + deploy: + resources: + limits: + cpus: '1.0' + memory: 1G + reservations: + cpus: '0.5' + memory: 512M + + grafana: + image: grafana/grafana:12.3.1 + container_name: grafana + ports: + - "3000:3000" + volumes: + - grafana-data:/var/lib/grafana + depends_on: + - loki + networks: + - monitoring + healthcheck: + test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:3000/api/health || exit 1"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 20s + deploy: + resources: + limits: + cpus: '1.0' + memory: 1G + reservations: + cpus: '0.5' + memory: 512M + + app-python: + image: blazz1t/devops_app:latest + container_name: devops-python-app + ports: + - "5000:5000" + networks: + - monitoring + labels: + logging: "promtail" + app: "devops-python" + healthcheck: + test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:5000/health')"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 10s + deploy: + resources: + limits: + cpus: '1.0' + memory: 1G + reservations: + cpus: '0.5' + memory: 512M + + prometheus: + image: prom/prometheus:v3.9.0 + container_name: prometheus + command: + - --config.file=/etc/prometheus/prometheus.yml + - '--storage.tsdb.retention.time=15d' + - '--storage.tsdb.retention.size=10GB' + ports: + - "9090:9090" + healthcheck: + test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:9090/-/healthy || exit 1"] + interval: 10s + timeout: 5s + retries: 5 + volumes: + - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro + - prometheus-data:/prometheus + networks: + - monitoring + deploy: + resources: + limits: + cpus: '1.0' + memory: 1G + reservations: + cpus: '0.5' + memory: 512M + +volumes: + loki-data: + grafana-data: + prometheus-data: + +networks: + monitoring: + driver: bridge \ No newline at end of file diff --git a/monitoring/docs/LAB07.md b/monitoring/docs/LAB07.md new file mode 100644 index 0000000000..a9cb1e6fd3 --- /dev/null +++ b/monitoring/docs/LAB07.md @@ -0,0 +1,459 @@ +# Lab 07 --- Centralized Logging with Loki, Promtail, and Grafana + +## Architecture + +The system implements centralized logging using **Grafana Loki**, +**Promtail**, and **Grafana** alongside a Python FastAPI application. + +**Components:** + +- **Python Application** --- Generates structured JSON logs. +- **Promtail** --- Discovers Docker containers and ships logs to Loki. +- **Loki** --- Stores and indexes logs. +- **Grafana** --- Visualizes logs using LogQL queries. + +**Flow:** + +1. Application writes logs to **stdout** in JSON format. +2. Docker stores logs in container log streams. +3. **Promtail** discovers containers via Docker API and reads logs. +4. Promtail pushes logs to **Loki**. +5. **Grafana** queries Loki and displays logs on dashboards. + +**Diagram (conceptual):** + + +-------------------+ + | Python App | + | (JSON logging) | + +--------+----------+ + | + v + +-------------------+ + | Docker Logs | + +--------+----------+ + | + v + +-------------------+ + | Promtail | + | Docker Discovery | + +--------+----------+ + | + v + +-------------------+ + | Loki | + | Log Storage | + +--------+----------+ + | + v + +-------------------+ + | Grafana | + | Dashboards & UI | + +-------------------+ + +------------------------------------------------------------------------ + +# Setup Guide + +## 1. Clone repository + +``` bash +git clone +cd monitoring +``` + +## 2. Start monitoring stack + +``` bash +docker compose up -d +``` + +## 3. Verify containers + +``` bash +docker ps +``` + +Expected containers: + +- loki +- promtail +- grafana +- devops-python-app + +## 4. Access Grafana + +Open: + + http://localhost:3000 + +Login page screenshot: + +![Grafana Login](grafana_login_page.png) + +## 5. Add Loki datasource + +Navigate: + + Connections → Data Sources → Loki + +Set URL: + + http://loki:3100 + +Screenshot: + +![Loki Datasource](grafana_loki.png) + +------------------------------------------------------------------------ + +# Configuration + +## Promtail Configuration + +Promtail automatically discovers Docker containers and collects logs. + +Key configuration snippet: + +``` yaml +scrape_configs: + - job_name: docker + docker_sd_configs: + - host: unix:///var/run/docker.sock + refresh_interval: 5s + filters: + - name: label + values: ["logging=promtail"] +``` + +**Explanation** + +- Uses **Docker service discovery** +- Only collects logs from containers labeled: + +```{=html} + +``` + logging=promtail + +This prevents collecting logs from unrelated containers. + +### Relabeling + +``` yaml +relabel_configs: + - source_labels: ['__meta_docker_container_name'] + regex: '/(.*)' + target_label: container + + - source_labels: ['__meta_docker_container_label_app'] + target_label: app +``` + +Purpose: + +- Extract container metadata +- Create labels used in **LogQL queries** + +Resulting labels: + + container + image + container_id + app + +------------------------------------------------------------------------ + +# Loki Configuration + +Loki stores logs using filesystem storage. + +Configuration snippet: + +``` yaml +limits_config: + retention_period: 168h +``` + +Logs are retained for: + + 7 days + +### Storage backend + + filesystem + +This is suitable for **development environments**. + +### Compactor + +``` yaml +compactor: + retention_enabled: true +``` + +Responsible for: + +- deleting expired logs +- compacting log chunks + +------------------------------------------------------------------------ + +# Application Logging + +The Python application implements **structured JSON logging** using: + + python-json-logger + +### Logger configuration + +``` python +logger = logging.getLogger() +logger.setLevel(logging.INFO) + +handler = logging.StreamHandler() +formatter = jsonlogger.JsonFormatter( + "%(asctime)s %(levelname)s %(message)s %(name)s" +) +handler.setFormatter(formatter) +logger.addHandler(handler) +``` + +### Example log output + +``` json +{ + "asctime": "2026-03-12 16:47:27", + "levelname": "INFO", + "message": "HTTP request", + "method": "GET", + "path": "/health", + "status_code": 200 +} +``` + +Benefits of JSON logging: + +- machine-readable format +- easy parsing in Loki +- structured LogQL queries + +### Logged events + +The application logs: + +- application startup +- HTTP requests +- request metadata +- exceptions + +Example request log: + +![GET Requests](logs_get_requests.png) + +------------------------------------------------------------------------ + +# Dashboard + +Example dashboard panels: + +![Dashboard Panels](dashboard_panels.png) + +### Panel 1 --- All Logs + +Query: + + {app="devops-python"} + +Displays all logs from the Python application. + +### Panel 2 --- HTTP Requests + +Query: + + {app="devops-python"} | json | method="GET" + +Filters logs to show only GET requests. + +### Panel 3 --- Error Logs + +Query: + + {app="devops-python"} | json | levelname="ERROR" + +Shows only error-level logs. + +### Panel 4 --- Request Duration + +Query: + + {app="devops-python"} | json | unwrap duration_ms + +Extracts request latency. + +------------------------------------------------------------------------ + +# Production Configuration + +Several production practices were applied. + +### Resource limits + +Each container includes CPU and memory constraints. + +Example: + +``` yaml +deploy: + resources: + limits: + cpus: '1.0' + memory: 1G +``` + +Benefits: + +- prevents resource exhaustion +- protects host stability + +### Retention policy + +Loki retains logs for: + + 168 hours (7 days) + +### Container health checks + +Example: + +``` yaml +healthcheck: + test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:5000/health')"] +``` + +This ensures containers remain healthy. + +Healthcheck screenshot: + +![Health Check](heealth_checl.png) + +------------------------------------------------------------------------ + +# Testing + +## Verify containers + +``` bash +docker ps +``` + +Expected status: + + healthy + +## Generate application logs + +``` bash +curl http://localhost:5000 +curl http://localhost:5000/health +``` + +## Verify logs inside container + +``` bash +docker logs devops-python-app +``` + +Screenshot: + +![Container Logs](logs_container.png) + +## Query logs in Grafana + +Example LogQL: + + {app="devops-python"} | json + +Result screenshot: + +![Logs in Grafana](grafana_promtail.png) + +------------------------------------------------------------------------ + +# Challenges + +## 1. Promtail not collecting logs + +**Problem** + +Promtail initially collected logs from all containers. + +**Solution** + +Added label filtering: + + logging=promtail + +------------------------------------------------------------------------ + +## 2. Application logs missing + +**Problem** + +Promtail only collects logs from stdout. + +**Solution** + +Configured Python logging to output JSON logs to stdout using: + + StreamHandler() + +------------------------------------------------------------------------ + +## 3. Health check failures + +**Problem** + +Health checks failed due to missing tools like `wget` in the container. + +**Solution** + +Used Python-based health check: + +``` yaml +python -c "import urllib.request" +``` + +------------------------------------------------------------------------ + +# Evidence of Completed Tasks + +### Grafana running + +![Grafana UI](grafana_grafana.png) + +### Loki datasource + +![Loki](grafana_loki.png) + +### Promtail logs + +![Promtail](grafana_promtail.png) + +### Application logs without errors + +![No Errors](logs_no_errors.png) + +------------------------------------------------------------------------ + +# Conclusion + +This lab successfully implemented a centralized logging stack using: + +- Loki +- Promtail +- Grafana +- Structured JSON logging + +The system enables efficient log aggregation, querying, and +visualization, which are essential practices for modern **DevOps +observability pipelines**. diff --git a/monitoring/docs/LAB08.md b/monitoring/docs/LAB08.md new file mode 100644 index 0000000000..c7249f11cb --- /dev/null +++ b/monitoring/docs/LAB08.md @@ -0,0 +1,197 @@ +# Lab 08 --- Metrics Monitoring with Prometheus & Grafana + +## Architecture + +This lab extends the logging stack by adding **metrics monitoring** +using Prometheus. + +**Flow:** + + FastAPI App → Prometheus → Grafana + +- FastAPI exposes `/metrics` +- Prometheus scrapes metrics every 15s +- Grafana visualizes metrics + +------------------------------------------------------------------------ + +## Application Instrumentation + +### HTTP Metrics + +- `http_requests_total` --- request count (RED: Rate) +- `http_request_duration_seconds` --- latency (RED: Duration) +- `http_requests_in_progress` --- active requests + +Labels used: - method - endpoint (normalized) - status_code + +### Business Metrics + +- `devops_info_endpoint_calls` --- endpoint usage +- `devops_info_system_collection_seconds` --- system info time + +**Why:** - Enables RED method monitoring - Tracks performance and load + +------------------------------------------------------------------------ + +## Prometheus Configuration + +- Scrape interval: 15s +- Retention: 15d / 10GB + +Targets: - Prometheus: localhost:9090 - App: devops-python-app:5000 - +Loki: loki:3100 - Grafana: grafana:3000 + +------------------------------------------------------------------------ + +## Dashboard Walkthrough + +### Panels + +1. **Request Rate** + +```{=html} + +``` + rate(http_requests_total[1m]) + +2. **Error Rate** + +```{=html} + +``` + rate(http_requests_total{status_code!~"2.."}[1m]) + +3. **Latency (p95)** + +```{=html} + +``` + histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m])) + +4. **Active Requests** + +```{=html} + +``` + http_requests_in_progress + +5. **Endpoint Usage** + +```{=html} + +``` + devops_info_endpoint_calls + +------------------------------------------------------------------------ + +## PromQL Examples + +1. Request rate: + +```{=html} + +``` + rate(http_requests_total[1m]) + +2. Error rate: + +```{=html} + +``` + rate(http_requests_total{status_code!~"2.."}[1m]) + +3. Latency p95: + +```{=html} + +``` + histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m])) + +4. Requests by endpoint: + +```{=html} + +``` + sum by (endpoint) (http_requests_total) + +5. Active requests: + +```{=html} + +``` + http_requests_in_progress + +------------------------------------------------------------------------ + +## Production Setup + +- Health checks for all services +- Resource limits (CPU/memory) +- Retention: + - Prometheus: 15 days + - Loki: 7 days + +------------------------------------------------------------------------ + +## Testing Results + +### Services healthy + +![Services](services_healthy.png) + +### Prometheus targets + +![Targets](prometheus_targets.png) + +### Metrics endpoint + +![Metrics](metrics_endpoint.png) + +### PromQL queries + +![Queries](prometheus_query.png) + +### Grafana dashboard + +![Dashboard](custom_dashboard.png) + +### Dashboard persistence + +![Persistence](dashboard_still_being_there.png) + +------------------------------------------------------------------------ + +## Challenges & Solutions + +### 1. Metrics not visible + +- Fixed by adding `/metrics` endpoint + +### 2. High cardinality + +- Solved by normalizing endpoints + +### 3. Wrong service names + +- Fixed using Docker service DNS + +------------------------------------------------------------------------ + +## Comparison: Metrics vs Logs + + Metrics Logs + ------------------- -------------------- + Aggregated Detailed + Fast queries Rich context + Good for alerting Good for debugging + +Use: - Metrics → monitoring & alerts - Logs → root cause analysis + +------------------------------------------------------------------------ + +## Evidence + +- Dashboards with live data +- PromQL queries (RED method) +- Healthy services diff --git a/monitoring/docs/custom_dashboard.png b/monitoring/docs/custom_dashboard.png new file mode 100644 index 0000000000..619a1b8e23 Binary files /dev/null and b/monitoring/docs/custom_dashboard.png differ diff --git a/monitoring/docs/dashboard_panels.png b/monitoring/docs/dashboard_panels.png new file mode 100644 index 0000000000..356f9ee89f Binary files /dev/null and b/monitoring/docs/dashboard_panels.png differ diff --git a/monitoring/docs/dashboard_still_being_there.png b/monitoring/docs/dashboard_still_being_there.png new file mode 100644 index 0000000000..24175ed084 Binary files /dev/null and b/monitoring/docs/dashboard_still_being_there.png differ diff --git a/monitoring/docs/grafana_dashboard.json b/monitoring/docs/grafana_dashboard.json new file mode 100644 index 0000000000..b0aa998e5b --- /dev/null +++ b/monitoring/docs/grafana_dashboard.json @@ -0,0 +1,669 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__elements": {}, + "__requires": [ + { + "type": "panel", + "id": "gauge", + "name": "Gauge", + "version": "" + }, + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "12.3.1" + }, + { + "type": "panel", + "id": "heatmap", + "name": "Heatmap", + "version": "" + }, + { + "type": "panel", + "id": "piechart", + "name": "Pie chart", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "stat", + "name": "Stat", + "version": "" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 7, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.3.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "up{job=\"app\"}", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Uptime", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [] + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 6, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "sort": "desc", + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (status) (rate(http_requests_total[5m]))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Status code distribution", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 16 + }, + "id": 5, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" + }, + "pluginVersion": "12.3.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "http_requests_in_progress", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Active requests", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 24 + }, + "id": 4, + "options": { + "calculate": false, + "cellGap": 1, + "color": { + "exponent": 0.5, + "fill": "dark-orange", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Oranges", + "steps": 64 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "tooltip": { + "mode": "single", + "showColorScale": false, + "yHistogram": false + }, + "yAxis": { + "axisPlacement": "left", + "reverse": false + } + }, + "pluginVersion": "12.3.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "rate(http_request_duration_seconds_bucket[5m])", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Request duration heatmap", + "type": "heatmap" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 32 + }, + "id": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m]))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Request duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 40 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum(rate(http_requests_total{status=~\"5..\"}[5m]))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Error rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 48 + }, + "id": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum(rate(http_requests_total[5m])) by (endpoint)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Request rate", + "type": "timeseries" + } + ], + "preload": false, + "schemaVersion": 42, + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Lab8 Dashboard", + "uid": "adq68bn", + "version": 2, + "weekStart": "" +} \ No newline at end of file diff --git a/monitoring/docs/grafana_grafana.png b/monitoring/docs/grafana_grafana.png new file mode 100644 index 0000000000..26aa788109 Binary files /dev/null and b/monitoring/docs/grafana_grafana.png differ diff --git a/monitoring/docs/grafana_login_page.png b/monitoring/docs/grafana_login_page.png new file mode 100644 index 0000000000..248aa8d987 Binary files /dev/null and b/monitoring/docs/grafana_login_page.png differ diff --git a/monitoring/docs/grafana_loki.png b/monitoring/docs/grafana_loki.png new file mode 100644 index 0000000000..53e93c5d95 Binary files /dev/null and b/monitoring/docs/grafana_loki.png differ diff --git a/monitoring/docs/grafana_promtail.png b/monitoring/docs/grafana_promtail.png new file mode 100644 index 0000000000..867d6307c6 Binary files /dev/null and b/monitoring/docs/grafana_promtail.png differ diff --git a/monitoring/docs/heealth_checl.png b/monitoring/docs/heealth_checl.png new file mode 100644 index 0000000000..d3ac08ad97 Binary files /dev/null and b/monitoring/docs/heealth_checl.png differ diff --git a/monitoring/docs/logs_container.png b/monitoring/docs/logs_container.png new file mode 100644 index 0000000000..ff149cc5a9 Binary files /dev/null and b/monitoring/docs/logs_container.png differ diff --git a/monitoring/docs/logs_get_requests.png b/monitoring/docs/logs_get_requests.png new file mode 100644 index 0000000000..e80d223c8f Binary files /dev/null and b/monitoring/docs/logs_get_requests.png differ diff --git a/monitoring/docs/logs_no_errors.png b/monitoring/docs/logs_no_errors.png new file mode 100644 index 0000000000..6aa31da284 Binary files /dev/null and b/monitoring/docs/logs_no_errors.png differ diff --git a/monitoring/docs/metrics_endpoint.png b/monitoring/docs/metrics_endpoint.png new file mode 100644 index 0000000000..459ac54524 Binary files /dev/null and b/monitoring/docs/metrics_endpoint.png differ diff --git a/monitoring/docs/prometheus_query.png b/monitoring/docs/prometheus_query.png new file mode 100644 index 0000000000..89435bb8b2 Binary files /dev/null and b/monitoring/docs/prometheus_query.png differ diff --git a/monitoring/docs/prometheus_targets.png b/monitoring/docs/prometheus_targets.png new file mode 100644 index 0000000000..ff53fc9d89 Binary files /dev/null and b/monitoring/docs/prometheus_targets.png differ diff --git a/monitoring/docs/services_healthy.png b/monitoring/docs/services_healthy.png new file mode 100644 index 0000000000..d1688dd563 Binary files /dev/null and b/monitoring/docs/services_healthy.png differ diff --git a/monitoring/loki/config.yml b/monitoring/loki/config.yml new file mode 100644 index 0000000000..846eec505d --- /dev/null +++ b/monitoring/loki/config.yml @@ -0,0 +1,43 @@ +auth_enabled: false + +server: + http_listen_port: 3100 + grpc_listen_port: 9096 + +common: + instance_addr: 0.0.0.0 + path_prefix: /loki + storage: + filesystem: + chunks_directory: /loki/chunks + rules_directory: /loki/rules + replication_factor: 1 + ring: + kvstore: + store: inmemory + +schema_config: + configs: + - from: 2024-01-01 + store: tsdb + object_store: filesystem + schema: v13 + index: + prefix: index_ + period: 24h + +storage_config: + tsdb_shipper: + active_index_directory: /loki/index + cache_location: /loki/index_cache + filesystem: + directory: /loki/chunks + +limits_config: + retention_period: 168h + +compactor: + working_directory: /loki/compactor + compaction_interval: 10m + retention_enabled: true + delete_request_store: filesystem \ No newline at end of file diff --git a/monitoring/prometheus/prometheus.yml b/monitoring/prometheus/prometheus.yml new file mode 100644 index 0000000000..ea7982d9ab --- /dev/null +++ b/monitoring/prometheus/prometheus.yml @@ -0,0 +1,30 @@ +global: + scrape_interval: 15s + evaluation_interval: 15s + +storage: + tsdb: + +scrape_configs: + # Prometheus itself + - job_name: 'prometheus' + static_configs: + - targets: ['localhost:9090'] + + # Your FastAPI app + - job_name: 'app' + metrics_path: /metrics + static_configs: + - targets: ['devops-python-app:5000'] + + # Loki metrics + - job_name: 'loki' + metrics_path: /metrics + static_configs: + - targets: ['loki:3100'] + + # Grafana metrics + - job_name: 'grafana' + metrics_path: /metrics + static_configs: + - targets: ['grafana:3000'] \ No newline at end of file diff --git a/monitoring/promtail/config.yml b/monitoring/promtail/config.yml new file mode 100644 index 0000000000..828d1f69fd --- /dev/null +++ b/monitoring/promtail/config.yml @@ -0,0 +1,36 @@ +server: + http_listen_port: 9080 + grpc_listen_port: 0 + +positions: + filename: /tmp/positions.yaml + +clients: + - url: http://loki:3100/loki/api/v1/push + +scrape_configs: + - job_name: docker + + docker_sd_configs: + - host: unix:///var/run/docker.sock + refresh_interval: 5s + filters: + - name: label + values: ["logging=promtail"] + + relabel_configs: + - source_labels: ['__meta_docker_container_name'] + regex: '/(.*)' + target_label: container + + - source_labels: ['__meta_docker_container_image'] + target_label: image + + - source_labels: ['__meta_docker_container_id'] + target_label: container_id + + - source_labels: ['__meta_docker_container_label_app'] + target_label: app + + pipeline_stages: + - docker: {} \ No newline at end of file diff --git a/terraform/docs/LAB04.md b/terraform/docs/LAB04.md new file mode 100644 index 0000000000..d44ba955b2 --- /dev/null +++ b/terraform/docs/LAB04.md @@ -0,0 +1,23 @@ +## Cloud Provider and infrastracture +For the completion of the task local homelab vm was setup, after the incident with VK cloud. The setup is the following: + +1. Remote router, set up to forward connections on ports 8443, 80, 5000 and 22 +2. Windows machine, set up to forward all the connections to the virtual machine inside of it +3. Ubuntu 24.04 VM, ran with Virtual box, that accepts connections on ports 22, 80 and 5000. + +For the sake of not blocking other ports, the ports are changed between the router and the final VM, so while going through the windows machine ports may change to 2222, 8080, etc. + +Total cost: 1000 rubles (stolen by VK cloud) + +Screenshot of VM terminal being accessed via ssh + +![](ssh_output.png) + +Since local VM was setup and cloud providers failed me I have not done a setup using terraform or pulumi. + +### VM for lab5 +I am keeping my cloud/local VM up till lab5. +The VM is acessible on my personal homelab IP address. + +Here's a proof of it still running: +![](check_status.png) \ No newline at end of file diff --git a/terraform/docs/check_status.png b/terraform/docs/check_status.png new file mode 100644 index 0000000000..257f995a84 Binary files /dev/null and b/terraform/docs/check_status.png differ diff --git a/terraform/docs/ssh_output.png b/terraform/docs/ssh_output.png new file mode 100644 index 0000000000..4cd0c992fd Binary files /dev/null and b/terraform/docs/ssh_output.png differ