fix: use external deploy script to avoid nested quoting errors
CI - Build & Test / Backend (.NET) (push) Successful in 29s
CI - Build & Test / Frontend (Vue/TS) (push) Successful in 18s
CI - Build & Test / Security Check (push) Successful in 4s

The inner shell script run via docker:cli had complex escaping
that caused 'unterminated quoted string' errors at runtime.
Moved the deploy logic to an external script file (heredoc in
the workflow YAML), mounted read-only into the docker:cli
container. Pass BUILD_ARGS and SERVICE via environment
variables instead of shell interpolation.
This commit is contained in:
2026-06-20 19:00:53 +02:00
parent 73c5eb69d7
commit f0023ac033
+66 -58
View File
@@ -201,70 +201,78 @@ jobs:
SERVICE_ARG="${{ github.event_name == 'workflow_dispatch' && inputs.service || '' }}" SERVICE_ARG="${{ github.event_name == 'workflow_dispatch' && inputs.service || '' }}"
# Write the deploy script to a file to avoid nested quoting issues
cat > /tmp/nexus-deploy-script.sh << 'DEPLOYSCRIPT'
#!/bin/sh
set -e
trap 'rm -f /tmp/nexus-deploy-env' EXIT
cat > /tmp/nexus-deploy-env
# ── Clean up zombie containers ──
docker compose --env-file /tmp/nexus-deploy-env down --remove-orphans 2>/dev/null || true
docker rm -f nexus-postgres-1 nexus-api-1 nexus-web-1 2>/dev/null || true
# ── WAL recovery ──
PG_VOL=$(docker volume ls -q --filter name=nexus-postgres 2>/dev/null | head -1)
if [ -n "$PG_VOL" ]; then
echo "Checking postgres WAL integrity..."
docker run --rm -v "$PG_VOL:/var/lib/postgresql/data" \
--entrypoint sh postgres:17-alpine -c "
echo 'Resetting WAL...'
pg_resetwal -f /var/lib/postgresql/data && echo 'WAL reset OK'
" 2>&1 || echo 'pg_resetwal failed (may be benign)'
else
echo 'Postgres volume not found - will be created fresh'
fi
BUILD_ARGS="${DEPLOY_BUILD_ARGS:-}"
SERVICE="${DEPLOY_SERVICE:-}"
if [ -n "$SERVICE" ]; then
echo "Deploying service: $SERVICE"
docker compose --env-file /tmp/nexus-deploy-env build $BUILD_ARGS $SERVICE
docker compose --env-file /tmp/nexus-deploy-env up -d --force-recreate $SERVICE
else
echo 'Deploying all services'
docker compose --env-file /tmp/nexus-deploy-env build $BUILD_ARGS
docker compose --env-file /tmp/nexus-deploy-env up -d --force-recreate
fi
echo 'Waiting for services to become healthy (up to 180s)...'
for i in $(seq 1 36); do
STATUS=$(docker compose --env-file /tmp/nexus-deploy-env ps -a 2>/dev/null | tail -n +2)
if echo "$STATUS" | grep -q 'unhealthy'; then
echo " [$i/36] Unhealthy containers - failing fast"
docker compose --env-file /tmp/nexus-deploy-env ps -a
docker compose --env-file /tmp/nexus-deploy-env logs --tail=30
exit 1
elif echo "$STATUS" | grep -q 'starting'; then
echo " [$i/36] Still starting..."
sleep 5
else
echo 'All containers healthy'
docker compose --env-file /tmp/nexus-deploy-env ps -a
exit 0
fi
done
echo 'Timeout waiting for services'
docker compose --env-file /tmp/nexus-deploy-env ps -a
docker compose --env-file /tmp/nexus-deploy-env logs --tail=20
exit 1
DEPLOYSCRIPT
docker run --rm \ docker run --rm \
-e "DEPLOY_BUILD_ARGS=${BUILD_ARGS:-}" \
-e "DEPLOY_SERVICE=${SERVICE_ARG:-}" \
-v "${DEPLOY_PATH}:/workspace/nexus" \ -v "${DEPLOY_PATH}:/workspace/nexus" \
-v /var/run/docker.sock:/var/run/docker.sock \ -v /var/run/docker.sock:/var/run/docker.sock \
-v /tmp/nexus-deploy-script.sh:/deploy.sh:ro \
-w /workspace/nexus \ -w /workspace/nexus \
-i \ -i \
docker:cli \ docker:cli \
sh -c " sh /deploy.sh < "${ENV_TMPFILE}"
set -e
trap 'rm -f /tmp/nexus-deploy-env' EXIT rm -f /tmp/nexus-deploy-script.sh
cat > /tmp/nexus-deploy-env
# ── Clean up zombie containers from failed deploys ──
docker compose --env-file /tmp/nexus-deploy-env down --remove-orphans 2>/dev/null || true
docker rm -f nexus-postgres-1 nexus-api-1 nexus-web-1 2>/dev/null || true
# ── WAL recovery: reset corrupt WAL that can block postgres startup ──
# Force-killed postgres containers can leave stale WAL entries that cause
# 'PANIC: could not locate a valid checkpoint record' on next start.
# pg_resetwal -f clears the WAL (losing uncommitted tx, which were lost anyway).
PG_VOL=\$(docker volume ls -q --filter name=nexus-postgres 2>/dev/null | head -1)
if [ -n \"\$PG_VOL\" ]; then
echo '🩺 Checking postgres WAL integrity...'
docker run --rm -v \"\$PG_VOL:/var/lib/postgresql/data\" \
--entrypoint sh postgres:17-alpine -c '
echo "Resetting WAL..."
pg_resetwal -f /var/lib/postgresql/data && echo \"✅ WAL reset OK\"
' 2>&1 || echo '⚠️ pg_resetwal failed — postgres may need manual intervention'
else
echo '⚠️ Postgres volume not found — will be created fresh'
fi
if [ -n '${SERVICE_ARG}' ]; then
echo '🚀 Deploying service: ${SERVICE_ARG}'
docker compose --env-file /tmp/nexus-deploy-env build ${BUILD_ARGS} ${SERVICE_ARG}
docker compose --env-file /tmp/nexus-deploy-env up -d --force-recreate ${SERVICE_ARG}
else
echo '🚀 Deploying all services'
docker compose --env-file /tmp/nexus-deploy-env build ${BUILD_ARGS}
docker compose --env-file /tmp/nexus-deploy-env up -d --force-recreate
fi
echo '⏳ Waiting for services to become healthy (up to 180s)...'
for i in \$(seq 1 36); do
UNHEALTHY=\$(docker compose --env-file /tmp/nexus-deploy-env ps -a 2>/dev/null | grep -E 'unhealthy' || true)
STARTING=\$(docker compose --env-file /tmp/nexus-deploy-env ps -a 2>/dev/null | grep -E 'starting' || true)
if [ -n \"\$UNHEALTHY\" ]; then
echo \" [\$i/36] Containers unhealthy:\"
echo \"\$UNHEALTHY\"
echo \"Failing fast — unhealthy container detected\"
docker compose --env-file /tmp/nexus-deploy-env logs --tail=30
exit 1
elif [ -n \"\$STARTING\" ]; then
echo \" [\$i/36] Still starting...\"
sleep 5
else
echo '✅ All containers healthy'
docker compose --env-file /tmp/nexus-deploy-env ps -a
exit 0
fi
done
echo '❌ Timeout waiting for services (180s)'
docker compose --env-file /tmp/nexus-deploy-env ps -a
docker compose --env-file /tmp/nexus-deploy-env logs --tail=20
exit 1
" < "${ENV_TMPFILE}"
echo "✅ Docker compose up completed" echo "✅ Docker compose up completed"