From 73c5eb69d7806468fe8694603e827dcbc2afb40a Mon Sep 17 00:00:00 2001 From: DevOps Date: Sat, 20 Jun 2026 18:57:54 +0200 Subject: [PATCH] fix: ensure zombie container cleanup before deploy + verbose pg_resetwal --- .gitea/workflows/deploy.yaml | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/.gitea/workflows/deploy.yaml b/.gitea/workflows/deploy.yaml index 806a1e8..2f6a826 100644 --- a/.gitea/workflows/deploy.yaml +++ b/.gitea/workflows/deploy.yaml @@ -212,6 +212,10 @@ jobs: trap 'rm -f /tmp/nexus-deploy-env' EXIT cat > /tmp/nexus-deploy-env + # ── Clean up zombie containers from failed deploys ── + docker compose --env-file /tmp/nexus-deploy-env down --remove-orphans 2>/dev/null || true + docker rm -f nexus-postgres-1 nexus-api-1 nexus-web-1 2>/dev/null || true + # ── WAL recovery: reset corrupt WAL that can block postgres startup ── # Force-killed postgres containers can leave stale WAL entries that cause # 'PANIC: could not locate a valid checkpoint record' on next start. @@ -221,8 +225,11 @@ jobs: echo '🩺 Checking postgres WAL integrity...' docker run --rm -v \"\$PG_VOL:/var/lib/postgresql/data\" \ --entrypoint sh postgres:17-alpine -c ' - pg_resetwal -f /var/lib/postgresql/data 2>/dev/null && echo \"✅ WAL reset OK\" || echo \"WAL reset not needed / benign error\" - ' 2>/dev/null || echo 'WAL check skipped' + echo "Resetting WAL..." + pg_resetwal -f /var/lib/postgresql/data && echo \"✅ WAL reset OK\" + ' 2>&1 || echo '⚠️ pg_resetwal failed — postgres may need manual intervention' + else + echo '⚠️ Postgres volume not found — will be created fresh' fi if [ -n '${SERVICE_ARG}' ]; then