fix: ensure zombie container cleanup before deploy + verbose pg_resetwal

2026-06-20 18:57:54 +02:00
parent 06eac66baa
commit 73c5eb69d7
1 changed files with 9 additions and 2 deletions
@@ -212,6 +212,10 @@ jobs:
              trap 'rm -f /tmp/nexus-deploy-env' EXIT
              cat > /tmp/nexus-deploy-env

+              # ── Clean up zombie containers from failed deploys ──
+              docker compose --env-file /tmp/nexus-deploy-env down --remove-orphans 2>/dev/null || true
+              docker rm -f nexus-postgres-1 nexus-api-1 nexus-web-1 2>/dev/null || true
+
              # ── WAL recovery: reset corrupt WAL that can block postgres startup ──
              # Force-killed postgres containers can leave stale WAL entries that cause
              # 'PANIC: could not locate a valid checkpoint record' on next start.
@@ -221,8 +225,11 @@ jobs:
                echo '🩺 Checking postgres WAL integrity...'
                docker run --rm -v \"\$PG_VOL:/var/lib/postgresql/data\" \
                  --entrypoint sh postgres:17-alpine -c '
-                    pg_resetwal -f /var/lib/postgresql/data 2>/dev/null && echo \"✅ WAL reset OK\" || echo \"WAL reset not needed / benign error\"
-                  ' 2>/dev/null || echo 'WAL check skipped'
+                    echo "Resetting WAL..."
+                    pg_resetwal -f /var/lib/postgresql/data && echo \"✅ WAL reset OK\"
+                  ' 2>&1 || echo '⚠️ pg_resetwal failed — postgres may need manual intervention'
+              else
+                echo '⚠️ Postgres volume not found — will be created fresh'
              fi

              if [ -n '${SERVICE_ARG}' ]; then