Skip to content

Commit 0040310

Browse files
committed
fix: introduce backups for disaster recovery
1 parent 418c1af commit 0040310

3 files changed

Lines changed: 127 additions & 0 deletions

File tree

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
#jinja2: trim_blocks:True, lstrip_blocks:True
2+
job "backup-{{ environments[env].version }}" {
3+
datacenters = ["{{ datacenter }}"]
4+
5+
type = "batch"
6+
7+
# The cron will run the job every hour.
8+
periodic {
9+
cron = "0 * * * *"
10+
}
11+
12+
group "backup-group" {
13+
count = 1
14+
15+
network {
16+
mode = "bridge"
17+
18+
dns {
19+
servers = {{ (ansible_facts['dns']['nameservers'] + ['1.1.1.1']) | tojson }}
20+
}
21+
}
22+
23+
volume "backups-volume" {
24+
type = "host"
25+
source = "backups-volume"
26+
read_only = false
27+
}
28+
29+
task "backup" {
30+
driver = "exec"
31+
32+
{% if env == 'devenv' %}
33+
resources {
34+
memory = 2048
35+
}
36+
{% endif %}
37+
38+
volume_mount {
39+
volume = "backups-volume"
40+
destination = "/local/backups"
41+
read_only = false
42+
}
43+
44+
{% if env != 'devenv' %}
45+
artifact {
46+
source = "https://primev-infrastructure-artifacts.s3.us-west-2.amazonaws.com/mev-commit-geth_{{ version }}_Linux_{{ target_system_architecture }}.tar.gz"
47+
}
48+
{% else %}
49+
artifact {
50+
source = "http://{{ ansible_facts['default_ipv4']['address'] }}:1111/mev-commit-geth_{{ version }}_Linux_{{ target_system_architecture }}.tar.gz"
51+
}
52+
{% endif %}
53+
54+
template {
55+
data = <<-EOH
56+
{%- raw %}
57+
GETH_DATA_DIR="/local/data/{% endraw %}{{ job.name }}{% raw %}/node-{{ env "NOMAD_ALLOC_INDEX" }}"
58+
{% endraw %}
59+
GETH_LOG_FORMAT="{{ job.env.get('log-format', 'json') }}"
60+
GETH_LOG_TAGS="{{ 'service.name:' + job.name + '-{{ env "NOMAD_ALLOC_INDEX" }}' + ',service.version:' + version }}"
61+
EOH
62+
destination = "secrets/.env"
63+
env = true
64+
}
65+
66+
template {
67+
data = <<-EOH
68+
#!/usr/bin/env bash
69+
70+
{%- raw %}
71+
{{- range nomadService "datadog-agent-logs-collector" }}
72+
{{ if contains "tcp" .Tags }}
73+
exec > >(nc {{ .Address }} {{ .Port }}) 2>&1
74+
{{ end }}
75+
{{- end }}
76+
{% endraw %}
77+
78+
BACKUP_FILE="local/backups/{{ version }}_{{ job.name }}-{% raw %}{{ env "NOMAD_ALLOC_INDEX" }}{% endraw %}_$(date +%Y%m%d%H%M%S)"
79+
STATUS=$(nomad alloc status -address="http://127.0.0.1:4646" -json "${NOMAD_ALLOC_ID}")
80+
NON_ZERO_EXIT_EVENTS=$(echo "$STATUS" | jq -r '.TaskStates.node.Events[] | select(.ExitCode != 0)')
81+
if [[ -n "${NON_ZERO_EXIT_EVENTS}" ]]; then
82+
echo "The main task did not start or finish gracefully"
83+
BACKUP_FILE+="-dirty"
84+
fi
85+
BACKUP_FILE+=".rlp"
86+
87+
echo "Exporting chain data to backup file: ${BACKUP_FILE}"
88+
START_TIME=$(date +%s)
89+
chmod +x local/mev-commit-geth
90+
local/mev-commit-geth \
91+
--verbosity=5 \
92+
--log.format="${GETH_LOG_FORMAT}" \
93+
--log.tags="${GETH_LOG_TAGS}" \
94+
--datadir="${GETH_DATA_DIR}" \
95+
export ${BACKUP_FILE}
96+
97+
if [[ "$?" -eq 0 ]] && [[ -f "${BACKUP_FILE}" ]]; then
98+
ELAPSED_TIME=$(($(date +%s) - START_TIME))
99+
echo "Backup finished in: $(date -u -d@${ELAPSED_TIME} +%H:%M:%S)"
100+
echo "Backup file size: $(du -h ${BACKUP_FILE} | cut -f1)"
101+
else
102+
echo "Backup failed"
103+
exit 1
104+
fi
105+
EOH
106+
destination = "local/run.sh"
107+
change_mode = "noop"
108+
perms = "0755"
109+
}
110+
111+
config {
112+
command = "bash"
113+
args = ["-c", "exec local/run.sh"]
114+
}
115+
}
116+
}
117+
}

infrastructure/nomad/playbooks/templates/services/nomad.hcl.j2

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,11 @@ client {
3636
path = "{{ ansible_user_home }}/{{ env }}/artifacts"
3737
}
3838
{% endif %}
39+
{% if env == "testnet" or env == "mainnet" %}
40+
host_volume "backups-volume" {
41+
path = "{{ ansible_user_home }}/{{ env }}/backups"
42+
}
43+
{% endif %}
3944
}
4045
{% endif %}
4146

infrastructure/nomad/playbooks/variables/profiles.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -673,6 +673,10 @@ jobs:
673673
to: 8080
674674
env:
675675

676+
backups: &backups_job
677+
name: backups
678+
template: backups.nomad.j2
679+
676680
profiles:
677681
ci:
678682
jobs:
@@ -735,6 +739,7 @@ profiles:
735739
- *mev_commit_oracle_job
736740
- *mev_commit_provider_emulator_node1_job
737741
- *datadog_agent_metrics_collector_job
742+
- *backups_job
738743

739744
stressnet:
740745
jobs:

0 commit comments

Comments
 (0)