Skip to content

Commit 68369ab

Browse files
authored
Overlay prod snapshots on non-prod sites (#2)
1 parent a0feba2 commit 68369ab

14 files changed

Lines changed: 371 additions & 37 deletions

File tree

README.md

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,13 @@ Deploy a docker compose project to a Google Cloud Compute Instance.
1616
|------|---------|
1717
| <a name="provider_cloudinit"></a> [cloudinit](#provider\_cloudinit) | 2.3.7 |
1818
| <a name="provider_google"></a> [google](#provider\_google) | 7.12.0 |
19+
| <a name="provider_time"></a> [time](#provider\_time) | n/a |
1920

2021
## Modules
2122

2223
| Name | Source | Version |
2324
|------|--------|---------|
24-
| <a name="module_ppb"></a> [ppb](#module\_ppb) | git::https://github.com/libops/terraform-cloudrun-v2 | 0.4.0 |
25+
| <a name="module_ppb"></a> [ppb](#module\_ppb) | git::https://github.com/libops/terraform-cloudrun-v2 | 0.5.0 |
2526

2627
## Resources
2728

@@ -30,7 +31,13 @@ Deploy a docker compose project to a Google Cloud Compute Instance.
3031
| [google_artifact_registry_repository_iam_member.private-policy-cloud-compose](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/artifact_registry_repository_iam_member) | resource |
3132
| [google_compute_disk.boot](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_disk) | resource |
3233
| [google_compute_disk.data](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_disk) | resource |
34+
| [google_compute_disk.docker-volumes](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_disk) | resource |
35+
| [google_compute_disk.overlay_disk](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_disk) | resource |
36+
| [google_compute_disk_resource_policy_attachment.daily_snapshot](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_disk_resource_policy_attachment) | resource |
37+
| [google_compute_disk_resource_policy_attachment.weekly_snapshot](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_disk_resource_policy_attachment) | resource |
3338
| [google_compute_instance.cloud-compose](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_instance) | resource |
39+
| [google_compute_resource_policy.daily_snapshot](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_resource_policy) | resource |
40+
| [google_compute_resource_policy.weekly_snapshot](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_resource_policy) | resource |
3441
| [google_project_iam_member.gce-start](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/project_iam_member) | resource |
3542
| [google_project_iam_member.gce-suspend](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/project_iam_member) | resource |
3643
| [google_project_iam_member.log](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/project_iam_member) | resource |
@@ -43,7 +50,9 @@ Deploy a docker compose project to a Google Cloud Compute Instance.
4350
| [google_service_account_iam_member.gsa-user](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/service_account_iam_member) | resource |
4451
| [google_service_account_iam_member.internal-services-keys](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/service_account_iam_member) | resource |
4552
| [google_service_account_iam_member.token-creator](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/service_account_iam_member) | resource |
53+
| [time_static.snapshot_time_static](https://registry.terraform.io/providers/hashicorp/time/latest/docs/resources/static) | resource |
4654
| [cloudinit_config.ci](https://registry.terraform.io/providers/hashicorp/cloudinit/latest/docs/data-sources/config) | data source |
55+
| [google_compute_snapshot.latest_prod](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/compute_snapshot) | data source |
4756
| [google_project_iam_custom_role.gce-start](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/project_iam_custom_role) | data source |
4857
| [google_project_iam_custom_role.gce-suspend](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/project_iam_custom_role) | data source |
4958

@@ -58,24 +67,24 @@ Deploy a docker compose project to a Google Cloud Compute Instance.
5867
| <a name="input_allowed_ips"></a> [allowed\_ips](#input\_allowed\_ips) | CIDR IP Addresses allowed to turn on this site's GCP instance | `list(string)` | `[]` | no |
5968
| <a name="input_allowed_ssh_ipv4"></a> [allowed\_ssh\_ipv4](#input\_allowed\_ssh\_ipv4) | CIDR IPv4 Addresses allowed to to SSH into this site's GCP instance | `list(string)` | `[]` | no |
6069
| <a name="input_allowed_ssh_ipv6"></a> [allowed\_ssh\_ipv6](#input\_allowed\_ssh\_ipv6) | CIDR IPv6 Addresses allowed to SSH into this site's GCP instance | `list(string)` | `[]` | no |
61-
| <a name="input_disk_size_gb"></a> [disk\_size\_gb](#input\_disk\_size\_gb) | Data disk size in GB | `number` | `25` | no |
70+
| <a name="input_disk_size_gb"></a> [disk\_size\_gb](#input\_disk\_size\_gb) | Data disk size in GB | `number` | `50` | no |
6271
| <a name="input_docker_compose_branch"></a> [docker\_compose\_branch](#input\_docker\_compose\_branch) | git branch to checkout for var.docker\_compose\_repo | `string` | `"main"` | no |
6372
| <a name="input_docker_compose_down"></a> [docker\_compose\_down](#input\_docker\_compose\_down) | Command to stop the docker compose project | `string` | `"docker compose down"` | no |
6473
| <a name="input_docker_compose_init"></a> [docker\_compose\_init](#input\_docker\_compose\_init) | After cloning the docker compose git repo, any initialization that needs to happen before the docker compose project can start | `string` | `""` | no |
6574
| <a name="input_docker_compose_up"></a> [docker\_compose\_up](#input\_docker\_compose\_up) | Command to start the docker compose project | `string` | `"docker compose up --remove-orphans"` | no |
66-
| <a name="input_machine_type"></a> [machine\_type](#input\_machine\_type) | VM machine type | `string` | `"e2-medium"` | no |
75+
| <a name="input_machine_type"></a> [machine\_type](#input\_machine\_type) | VM machine type (General-purpose series that support Hyperdisk Balanced | `string` | `"n4-standard-2"` | no |
6776
| <a name="input_os"></a> [os](#input\_os) | The host OS to install on the GCP instance | `string` | `"cos-125-19216-104-25"` | no |
68-
| <a name="input_region"></a> [region](#input\_region) | GCP region for resources | `string` | `"us-central1"` | no |
69-
| <a name="input_zone"></a> [zone](#input\_zone) | GCP zone for resources | `string` | `"us-central1-f"` | no |
77+
| <a name="input_overlay_source_instance"></a> [overlay\_source\_instance](#input\_overlay\_source\_instance) | Name of production instance to get latest snapshot from (e.g., 'ojs-production'). Terraform will automatically use the most recent snapshot from this instance's data disk. Leave empty for production environments. | `string` | `""` | no |
78+
| <a name="input_region"></a> [region](#input\_region) | GCP region for resources | `string` | `"us-east5"` | no |
79+
| <a name="input_run_snapshots"></a> [run\_snapshots](#input\_run\_snapshots) | Enable daily snapshots of the data disk (recommended for production). Last seven days of snapshots are available. Also weekly snapshots for past year. | `bool` | `false` | no |
80+
| <a name="input_volume_names"></a> [volume\_names](#input\_volume\_names) | List of docker volumes to overlay from production snapshot (e.g., ['compose\_ojs-public']). Production data is mounted read-only as lower layer, staging writes go to upper layer. | `list(string)` | `[]` | no |
81+
| <a name="input_zone"></a> [zone](#input\_zone) | GCP zone for resources | `string` | `"us-east5-b"` | no |
7082

7183
## Outputs
7284

7385
| Name | Description |
7486
|------|-------------|
7587
| <a name="output_appGsa"></a> [appGsa](#output\_appGsa) | The Google Service Account the app can leverage to auth to other Google services |
76-
| <a name="output_instanceGsa"></a> [instanceGsa](#output\_instanceGsa) | The Google Service Account the compute instance runs as |
77-
| <a name="output_instance_id"></a> [instance\_id](#output\_instance\_id) | n/a |
78-
| <a name="output_name"></a> [name](#output\_name) | n/a |
88+
| <a name="output_instance"></a> [instance](#output\_instance) | The Google Compute instance ID, name, zone, data disk, GSA for the instance. |
7989
| <a name="output_serviceGsa"></a> [serviceGsa](#output\_serviceGsa) | The Google Service Account internal services that manage the VM runs as |
80-
| <a name="output_zone"></a> [zone](#output\_zone) | n/a |
8190
<!-- END_TF_DOCS -->

examples/ojs/README.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# OJS
2+
3+
Deploy OJS to Google Cloud
4+
5+
## Usage
6+
7+
Create the production VM
8+
9+
```
10+
terraform init
11+
terraform apply -target=module.production
12+
```
13+
14+
The staging VM then relies on a snapshot of the public files docker volume. This is to allow staging to mirror production.
15+
16+
So need wait until snapshot schedule executes (~1h) OR get a snapshot of the production docker volume disk immediately. then the rest of the terraform runs can just execute as normal
17+
18+
```
19+
terraform apply
20+
```

examples/ojs/main.tf

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
resource "random_shuffle" "zone" {
2+
input = var.region == "us-central1" ? ["a", "b", "c", "f"] : ["a", "b", "c"]
3+
result_count = 1
4+
}
5+
6+
module "production" {
7+
source = "git::https://github.com/libops/cloud-compose?ref=0.0.1"
8+
9+
name = "ojs-production"
10+
project_id = var.project_id
11+
project_number = var.project_number
12+
docker_compose_repo = var.docker_compose_repo
13+
docker_compose_init = var.docker_compose_init
14+
region = var.region
15+
zone = format("%s-%s", var.region, random_shuffle.zone.result[0])
16+
run_snapshots = true
17+
allowed_ips = var.allowed_ips
18+
}
19+
20+
module "staging" {
21+
source = "git::https://github.com/libops/cloud-compose?ref=0.0.1"
22+
23+
name = "ojs-staging"
24+
project_id = var.project_id
25+
project_number = var.project_number
26+
docker_compose_repo = var.docker_compose_repo
27+
docker_compose_init = var.docker_compose_init
28+
region = var.region
29+
zone = format("%s-%s", var.region, random_shuffle.zone.result[0])
30+
disk_size_gb = 20
31+
allowed_ips = var.allowed_ips
32+
33+
# make production public files available in staging
34+
overlay_source_instance = "ojs-production"
35+
volume_names = [
36+
"compose_ojs-public",
37+
"compose_ojs-files"
38+
]
39+
}

examples/ojs/terraform.tfvars

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
docker_compose_repo = "https://github.com/libops/ojs"
2+
docker_compose_init = "docker compose up init"

examples/ojs/variables.tf

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
variable "project_id" {
2+
description = "The GCP project ID"
3+
type = string
4+
}
5+
6+
variable "project_number" {
7+
type = string
8+
description = "The GCP project number"
9+
}
10+
11+
variable "region" {
12+
description = "GCP region for resources"
13+
type = string
14+
default = "us-central1"
15+
}
16+
17+
variable "docker_compose_repo" {
18+
type = string
19+
description = "git repo to checkout that contains a docker compose project"
20+
}
21+
22+
variable "docker_compose_init" {
23+
type = string
24+
default = ""
25+
description = "After cloning the docker compose git repo, any initialization that needs to happen before the docker compose project can start"
26+
}
27+
28+
variable "allowed_ips" {
29+
type = list(string)
30+
default = []
31+
description = "CIDR IP Addresses allowed to turn on this site's GCP instance"
32+
}

main.tf

Lines changed: 144 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ provider "google" {
1414
region = var.region
1515
}
1616

17+
resource "time_static" "snapshot_time_static" {}
18+
1719
locals {
1820
rootFs = "${path.module}/rootfs"
1921
write_files_content = join("\n", [
@@ -55,11 +57,20 @@ EOT
5557
DOCKER_COMPOSE_REPO="${var.docker_compose_repo}"
5658
DOCKER_COMPOSE_BRANCH="${var.docker_compose_branch}"
5759
EOT
60+
use_overlay = length(var.volume_names) > 0
61+
prod_disk_name = var.overlay_source_instance != "" ? format("%s-data-disk", var.overlay_source_instance) : ""
62+
prod_disk_url = var.overlay_source_instance != "" ? format("https://www.googleapis.com/compute/v1/projects/%s/zones/%s/disks/%s-docker-volumes", var.project_id, var.zone, var.overlay_source_instance) : ""
5863
cloud_init_yaml = templatefile("${path.module}/templates/cloud-init.yml", {
5964
WRITE_FILES_CONTENT = local.write_files_content,
6065
DOCKER_COMPOSE_SCRIPTS = local.docker_compose_scripts,
6166
ENV_FILE_CONTENT = local.env_file_content,
67+
USE_OVERLAY = local.use_overlay,
68+
DOCKER_VOLUME_OVERLAYS = var.volume_names,
6269
})
70+
71+
# have prod snapshot begin ten minutes after the initial run
72+
# so non-prod environments can have a snapshot disk to overlay
73+
snapshot_start_time = formatdate("h:00", time_static.snapshot_time_static.rfc3339)
6374
}
6475

6576
data "cloudinit_config" "ci" {
@@ -70,7 +81,7 @@ data "cloudinit_config" "ci" {
7081
}
7182

7283
resource "google_service_account" "cloud-compose" {
73-
account_id = format("cloud-compose-%s", var.name)
84+
account_id = format("vm-%s", var.name)
7485
project = var.project_id
7586
}
7687

@@ -109,7 +120,7 @@ resource "google_compute_disk" "boot" {
109120
project = var.project_id
110121
type = "hyperdisk-balanced"
111122
zone = var.zone
112-
size = 20
123+
size = 15
113124
image = "projects/cos-cloud/global/images/${var.os}"
114125
physical_block_size_bytes = 4096
115126
}
@@ -119,11 +130,121 @@ resource "google_compute_disk" "data" {
119130
project = var.project_id
120131
type = "hyperdisk-balanced"
121132
zone = var.zone
133+
size = 20
134+
image = "debian-13-trixie-v20251111"
135+
physical_block_size_bytes = 4096
136+
}
137+
138+
resource "google_compute_disk" "docker-volumes" {
139+
name = format("%s-docker-volumes", var.name)
140+
project = var.project_id
141+
type = "hyperdisk-balanced"
142+
zone = var.zone
122143
size = var.disk_size_gb
123144
image = "debian-13-trixie-v20251111"
124145
physical_block_size_bytes = 4096
125146
}
126147

148+
149+
# Daily snapshot schedule for production docker volume disk
150+
resource "google_compute_resource_policy" "daily_snapshot" {
151+
count = var.run_snapshots ? 1 : 0
152+
name = format("%s-daily-snapshot", var.name)
153+
project = var.project_id
154+
region = var.region
155+
156+
snapshot_schedule_policy {
157+
schedule {
158+
daily_schedule {
159+
days_in_cycle = 1
160+
start_time = local.snapshot_start_time
161+
}
162+
}
163+
164+
retention_policy {
165+
max_retention_days = 7
166+
on_source_disk_delete = "KEEP_AUTO_SNAPSHOTS"
167+
}
168+
169+
snapshot_properties {
170+
labels = {
171+
managed_by = "terraform"
172+
instance = var.name
173+
}
174+
storage_locations = [var.region]
175+
guest_flush = false
176+
}
177+
}
178+
}
179+
resource "google_compute_disk_resource_policy_attachment" "daily_snapshot" {
180+
count = var.run_snapshots ? 1 : 0
181+
name = google_compute_resource_policy.daily_snapshot[0].name
182+
disk = google_compute_disk.docker-volumes.name
183+
project = var.project_id
184+
zone = var.zone
185+
}
186+
187+
resource "google_compute_resource_policy" "weekly_snapshot" {
188+
count = var.run_snapshots ? 1 : 0
189+
name = format("%s-weekly-snapshot", var.name)
190+
project = var.project_id
191+
region = var.region
192+
193+
snapshot_schedule_policy {
194+
schedule {
195+
weekly_schedule {
196+
day_of_weeks {
197+
day = "SUNDAY"
198+
start_time = "01:00"
199+
}
200+
}
201+
}
202+
203+
retention_policy {
204+
max_retention_days = 365
205+
on_source_disk_delete = "KEEP_AUTO_SNAPSHOTS"
206+
}
207+
208+
snapshot_properties {
209+
storage_locations = [var.region]
210+
guest_flush = false
211+
}
212+
}
213+
}
214+
215+
resource "google_compute_disk_resource_policy_attachment" "weekly_snapshot" {
216+
count = var.run_snapshots ? 1 : 0
217+
name = google_compute_resource_policy.weekly_snapshot[0].name
218+
disk = google_compute_disk.docker-volumes.name
219+
project = var.project_id
220+
zone = var.zone
221+
}
222+
223+
# Get the latest snapshot from production instance's data disk
224+
data "google_compute_snapshot" "latest_prod" {
225+
count = local.use_overlay ? 1 : 0
226+
project = var.project_id
227+
228+
# Filter to snapshots of the production data disk, get most recent
229+
most_recent = true
230+
filter = "sourceDisk eq ${local.prod_disk_url}"
231+
}
232+
233+
# Restore production snapshot to a staging-specific disk for overlays
234+
resource "google_compute_disk" "overlay_disk" {
235+
count = local.use_overlay ? 1 : 0
236+
name = format("%s-overlay-disk", var.name)
237+
project = var.project_id
238+
type = "hyperdisk-balanced"
239+
zone = var.zone
240+
snapshot = data.google_compute_snapshot.latest_prod[0].self_link
241+
physical_block_size_bytes = 4096
242+
243+
lifecycle {
244+
create_before_destroy = true
245+
}
246+
}
247+
127248
resource "google_compute_instance" "cloud-compose" {
128249
name = var.name
129250
project = var.project_id
@@ -142,6 +263,21 @@ resource "google_compute_instance" "cloud-compose" {
142263
device_name = "data"
143264
source = google_compute_disk.data.self_link
144265
}
266+
attached_disk {
267+
device_name = "docker-volumes"
268+
source = google_compute_disk.docker-volumes.self_link
269+
}
270+
271+
dynamic "attached_disk" {
272+
for_each = local.use_overlay ? [1] : []
273+
content {
274+
device_name = "prod-volumes"
275+
source = google_compute_disk.overlay_disk[0].self_link
276+
# hyperdisk needs to be attached rw
277+
# even though we're setting this as lowerdir read only
278+
mode = "READ_WRITE"
279+
}
280+
}
145281

146282
metadata = {
147283
google-logging-enabled = "true"
@@ -182,6 +318,10 @@ resource "google_compute_instance" "cloud-compose" {
182318
enable_secure_boot = "true"
183319
enable_vtpm = "true"
184320
}
321+
322+
lifecycle {
323+
create_before_destroy = false
324+
}
185325
}
186326

187327
# machine needs to be able to suspend itself
@@ -196,7 +336,7 @@ data "google_project_iam_custom_role" "gce-suspend" {
196336
# =============================================================================
197337

198338
resource "google_service_account" "internal-services" {
199-
account_id = format("internal-services-%s", var.name)
339+
account_id = format("internal-%s", var.name)
200340
project = var.project_id
201341
}
202342

@@ -286,7 +426,7 @@ resource "google_service_account" "ppb" {
286426
}
287427

288428
module "ppb" {
289-
source = "git::https://github.com/libops/terraform-cloudrun-v2?ref=0.4.0"
429+
source = "git::https://github.com/libops/terraform-cloudrun-v2?ref=0.5.0"
290430

291431
name = var.name
292432
project = var.project_id

0 commit comments

Comments
 (0)