Skip to content

Commit 4ea0807

Browse files
committed
Fix DNS Router crash-loop when NoizDNS tunnels fail to start
Critical bug: the router was started BEFORE tunnels, so if NoizDNS services failed (wrong binary, bad config, etc.), the router tried to connect to dead backend ports and crash-looped, taking down ALL tunnels including the 4 working ones. Fix: reversed the order in both step_start_services and do_add_domain: 1. Start all tunnels first 2. Verify NoizDNS tunnels are actually running 3. If any NoizDNS tunnel failed, auto-remove it to protect the router 4. THEN start the DNS Router (only healthy backends) This prevents one broken NoizDNS tunnel from killing the entire system.
1 parent 4dd0e1b commit 4ea0807

1 file changed

Lines changed: 79 additions & 36 deletions

File tree

dnstm-setup.sh

Lines changed: 79 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -3856,15 +3856,61 @@ step_start_services() {
38563856
# Reload systemd to pick up any service overrides (e.g., NoizDNS binary swap)
38573857
systemctl daemon-reload 2>/dev/null || true
38583858

3859-
# Only restart router if tunnels/install changed (avoid downtime on re-runs)
3859+
# ── 1. Start tunnels FIRST (before router) ──────────────────────────────────
3860+
# The DNS Router crash-loops if any configured backend tunnel isn't running.
3861+
# So we must start all tunnels and verify they're healthy BEFORE starting the router.
3862+
3863+
# Stop router while we start tunnels (it may be running from a previous install)
38603864
if [[ "$TUNNELS_CHANGED" == "true" ]]; then
3861-
# Stop router first to ensure it picks up the new tunnel config
3862-
# (install may have started it before tunnels were created)
38633865
print_info "Stopping DNS Router (to reload tunnel config)..."
38643866
dnstm router stop 2>/dev/null || true
38653867
sleep 1
3868+
fi
3869+
3870+
echo ""
3871+
3872+
# Start all tunnels
3873+
local all_tags
3874+
all_tags=$(dnstm tunnel list 2>/dev/null | grep -o 'tag=[^ ]*' | sed 's/tag=//' || true)
3875+
if [[ -z "$all_tags" ]]; then
3876+
all_tags="slip1 dnstt1 slip-ssh dnstt-ssh"
3877+
[[ -x /usr/local/bin/noizdns-server ]] && all_tags+=" noiz1 noiz-ssh"
3878+
fi
3879+
for tag in $all_tags; do
3880+
print_info "Starting tunnel: ${tag}..."
3881+
if dnstm tunnel start --tag "$tag" 2>/dev/null; then
3882+
print_ok "Started: ${tag}"
3883+
else
3884+
if dnstm tunnel list 2>/dev/null | awk -v t="tag=${tag}" '{for(i=1;i<=NF;i++) if($i==t){print;next}}' | grep -qi "running"; then
3885+
print_ok "Already running: ${tag}"
3886+
else
3887+
print_warn "Could not start: ${tag}. Check: dnstm tunnel logs --tag ${tag}"
3888+
fi
3889+
fi
3890+
done
3891+
3892+
# ── 2. Verify NoizDNS tunnels actually started ──────────────────────────────
3893+
# If NoizDNS services failed (wrong binary, bad config, etc.), remove them
3894+
# so the DNS Router doesn't crash-loop trying to connect to dead backends.
3895+
sleep 1
3896+
for noiz_tag in noiz1 noiz-ssh; do
3897+
if dnstm tunnel list 2>/dev/null | grep -q "tag=${noiz_tag}"; then
3898+
if ! systemctl is-active --quiet "dnstm-${noiz_tag}.service" 2>/dev/null; then
3899+
print_warn "NoizDNS tunnel ${noiz_tag} failed to start — removing to protect DNS Router"
3900+
dnstm tunnel stop --tag "$noiz_tag" 2>/dev/null || true
3901+
dnstm tunnel remove --tag "$noiz_tag" 2>/dev/null || true
3902+
rm -f "/etc/systemd/system/dnstm-${noiz_tag}.service.d/10-noizdns-binary.conf" 2>/dev/null || true
3903+
rmdir "/etc/systemd/system/dnstm-${noiz_tag}.service.d" 2>/dev/null || true
3904+
systemctl daemon-reload 2>/dev/null || true
3905+
print_info "Removed ${noiz_tag} — other tunnels will work normally"
3906+
fi
3907+
fi
3908+
done
38663909

3867-
# Start router — this reads config.json which now has all tunnels
3910+
echo ""
3911+
3912+
# ── 3. Start DNS Router (now that all healthy tunnels are running) ───────────
3913+
if [[ "$TUNNELS_CHANGED" == "true" ]]; then
38683914
print_info "Starting DNS Router..."
38693915
if dnstm router start 2>/dev/null; then
38703916
print_ok "DNS Router started"
@@ -3910,28 +3956,6 @@ step_start_services() {
39103956
fi
39113957
fi
39123958

3913-
echo ""
3914-
3915-
# Start tunnels (discover all tags dynamically to support --add-domain tunnels)
3916-
local all_tags
3917-
all_tags=$(dnstm tunnel list 2>/dev/null | grep -o 'tag=[^ ]*' | sed 's/tag=//' || true)
3918-
if [[ -z "$all_tags" ]]; then
3919-
all_tags="slip1 dnstt1 slip-ssh dnstt-ssh"
3920-
[[ -x /usr/local/bin/noizdns-server ]] && all_tags+=" noiz1 noiz-ssh"
3921-
fi
3922-
for tag in $all_tags; do
3923-
print_info "Starting tunnel: ${tag}..."
3924-
if dnstm tunnel start --tag "$tag" 2>/dev/null; then
3925-
print_ok "Started: ${tag}"
3926-
else
3927-
if dnstm tunnel list 2>/dev/null | awk -v t="tag=${tag}" '{for(i=1;i<=NF;i++) if($i==t){print;next}}' | grep -qi "running"; then
3928-
print_ok "Already running: ${tag}"
3929-
else
3930-
print_warn "Could not start: ${tag}. Check: dnstm tunnel logs --tag ${tag}"
3931-
fi
3932-
fi
3933-
done
3934-
39353959
echo ""
39363960
print_info "Current tunnel status:"
39373961
echo ""
@@ -4799,21 +4823,14 @@ do_add_domain() {
47994823
echo ""
48004824

48014825
# Reload systemd to pick up any service overrides (NoizDNS binary swap)
4802-
# Must happen BEFORE router restart to ensure overrides are active
48034826
systemctl daemon-reload 2>/dev/null || true
48044827

4805-
# Restart router to pick up new tunnel config
4806-
print_info "Restarting DNS Router to load new tunnels..."
4828+
# Stop router while we start tunnels (router crash-loops if backends are dead)
4829+
print_info "Stopping DNS Router..."
48074830
dnstm router stop 2>/dev/null || true
48084831
sleep 1
4809-
if dnstm router start 2>/dev/null; then
4810-
print_ok "DNS Router restarted"
4811-
else
4812-
print_warn "DNS Router restart may have issues. Check: dnstm router logs"
4813-
fi
4814-
echo ""
48154832

4816-
# Start new tunnels (include NoizDNS if available)
4833+
# Start new tunnels FIRST (before router)
48174834
local _start_tags="$slip_tag $dnstt_tag $slip_ssh_tag $dnstt_ssh_tag"
48184835
if [[ -x /usr/local/bin/noizdns-server ]]; then
48194836
_start_tags+=" ${noiz_tag:-} ${noiz_ssh_tag:-}"
@@ -4832,6 +4849,32 @@ do_add_domain() {
48324849
fi
48334850
done
48344851

4852+
# Verify NoizDNS tunnels started — remove dead ones to protect router
4853+
sleep 1
4854+
for _ntag in ${noiz_tag:-} ${noiz_ssh_tag:-}; do
4855+
[[ -z "$_ntag" ]] && continue
4856+
if dnstm tunnel list 2>/dev/null | grep -q "tag=${_ntag}"; then
4857+
if ! systemctl is-active --quiet "dnstm-${_ntag}.service" 2>/dev/null; then
4858+
print_warn "NoizDNS tunnel ${_ntag} failed to start — removing to protect DNS Router"
4859+
dnstm tunnel stop --tag "$_ntag" 2>/dev/null || true
4860+
dnstm tunnel remove --tag "$_ntag" 2>/dev/null || true
4861+
rm -f "/etc/systemd/system/dnstm-${_ntag}.service.d/10-noizdns-binary.conf" 2>/dev/null || true
4862+
rmdir "/etc/systemd/system/dnstm-${_ntag}.service.d" 2>/dev/null || true
4863+
systemctl daemon-reload 2>/dev/null || true
4864+
print_info "Removed ${_ntag} — other tunnels will work normally"
4865+
fi
4866+
fi
4867+
done
4868+
4869+
# NOW start the router (all backends are healthy)
4870+
echo ""
4871+
print_info "Starting DNS Router..."
4872+
if dnstm router start 2>/dev/null; then
4873+
print_ok "DNS Router restarted"
4874+
else
4875+
print_warn "DNS Router restart may have issues. Check: dnstm router logs"
4876+
fi
4877+
48354878
echo ""
48364879
print_info "All tunnels:"
48374880
echo ""

0 commit comments

Comments
 (0)