From f0023ac033f44146b7d47cdec8235b22a2cc2f7a Mon Sep 17 00:00:00 2001 From: DevOps Date: Sat, 20 Jun 2026 19:00:53 +0200 Subject: [PATCH] fix: use external deploy script to avoid nested quoting errors The inner shell script run via docker:cli had complex escaping that caused 'unterminated quoted string' errors at runtime. Moved the deploy logic to an external script file (heredoc in the workflow YAML), mounted read-only into the docker:cli container. Pass BUILD_ARGS and SERVICE via environment variables instead of shell interpolation. --- .gitea/workflows/deploy.yaml | 124 +++++++++++++++++++---------------- 1 file changed, 66 insertions(+), 58 deletions(-) diff --git a/.gitea/workflows/deploy.yaml b/.gitea/workflows/deploy.yaml index 2f6a826..b569b48 100644 --- a/.gitea/workflows/deploy.yaml +++ b/.gitea/workflows/deploy.yaml @@ -201,70 +201,78 @@ jobs: SERVICE_ARG="${{ github.event_name == 'workflow_dispatch' && inputs.service || '' }}" + # Write the deploy script to a file to avoid nested quoting issues + cat > /tmp/nexus-deploy-script.sh << 'DEPLOYSCRIPT' +#!/bin/sh +set -e +trap 'rm -f /tmp/nexus-deploy-env' EXIT +cat > /tmp/nexus-deploy-env + +# ── Clean up zombie containers ── +docker compose --env-file /tmp/nexus-deploy-env down --remove-orphans 2>/dev/null || true +docker rm -f nexus-postgres-1 nexus-api-1 nexus-web-1 2>/dev/null || true + +# ── WAL recovery ── +PG_VOL=$(docker volume ls -q --filter name=nexus-postgres 2>/dev/null | head -1) +if [ -n "$PG_VOL" ]; then + echo "Checking postgres WAL integrity..." + docker run --rm -v "$PG_VOL:/var/lib/postgresql/data" \ + --entrypoint sh postgres:17-alpine -c " + echo 'Resetting WAL...' + pg_resetwal -f /var/lib/postgresql/data && echo 'WAL reset OK' + " 2>&1 || echo 'pg_resetwal failed (may be benign)' +else + echo 'Postgres volume not found - will be created fresh' +fi + +BUILD_ARGS="${DEPLOY_BUILD_ARGS:-}" +SERVICE="${DEPLOY_SERVICE:-}" + +if [ -n "$SERVICE" ]; then + echo "Deploying service: $SERVICE" + docker compose --env-file /tmp/nexus-deploy-env build $BUILD_ARGS $SERVICE + docker compose --env-file /tmp/nexus-deploy-env up -d --force-recreate $SERVICE +else + echo 'Deploying all services' + docker compose --env-file /tmp/nexus-deploy-env build $BUILD_ARGS + docker compose --env-file /tmp/nexus-deploy-env up -d --force-recreate +fi + +echo 'Waiting for services to become healthy (up to 180s)...' +for i in $(seq 1 36); do + STATUS=$(docker compose --env-file /tmp/nexus-deploy-env ps -a 2>/dev/null | tail -n +2) + if echo "$STATUS" | grep -q 'unhealthy'; then + echo " [$i/36] Unhealthy containers - failing fast" + docker compose --env-file /tmp/nexus-deploy-env ps -a + docker compose --env-file /tmp/nexus-deploy-env logs --tail=30 + exit 1 + elif echo "$STATUS" | grep -q 'starting'; then + echo " [$i/36] Still starting..." + sleep 5 + else + echo 'All containers healthy' + docker compose --env-file /tmp/nexus-deploy-env ps -a + exit 0 + fi +done +echo 'Timeout waiting for services' +docker compose --env-file /tmp/nexus-deploy-env ps -a +docker compose --env-file /tmp/nexus-deploy-env logs --tail=20 +exit 1 +DEPLOYSCRIPT + docker run --rm \ + -e "DEPLOY_BUILD_ARGS=${BUILD_ARGS:-}" \ + -e "DEPLOY_SERVICE=${SERVICE_ARG:-}" \ -v "${DEPLOY_PATH}:/workspace/nexus" \ -v /var/run/docker.sock:/var/run/docker.sock \ + -v /tmp/nexus-deploy-script.sh:/deploy.sh:ro \ -w /workspace/nexus \ -i \ docker:cli \ - sh -c " - set -e - trap 'rm -f /tmp/nexus-deploy-env' EXIT - cat > /tmp/nexus-deploy-env - - # ── Clean up zombie containers from failed deploys ── - docker compose --env-file /tmp/nexus-deploy-env down --remove-orphans 2>/dev/null || true - docker rm -f nexus-postgres-1 nexus-api-1 nexus-web-1 2>/dev/null || true - - # ── WAL recovery: reset corrupt WAL that can block postgres startup ── - # Force-killed postgres containers can leave stale WAL entries that cause - # 'PANIC: could not locate a valid checkpoint record' on next start. - # pg_resetwal -f clears the WAL (losing uncommitted tx, which were lost anyway). - PG_VOL=\$(docker volume ls -q --filter name=nexus-postgres 2>/dev/null | head -1) - if [ -n \"\$PG_VOL\" ]; then - echo '🩺 Checking postgres WAL integrity...' - docker run --rm -v \"\$PG_VOL:/var/lib/postgresql/data\" \ - --entrypoint sh postgres:17-alpine -c ' - echo "Resetting WAL..." - pg_resetwal -f /var/lib/postgresql/data && echo \"✅ WAL reset OK\" - ' 2>&1 || echo '⚠️ pg_resetwal failed — postgres may need manual intervention' - else - echo '⚠️ Postgres volume not found — will be created fresh' - fi - - if [ -n '${SERVICE_ARG}' ]; then - echo '🚀 Deploying service: ${SERVICE_ARG}' - docker compose --env-file /tmp/nexus-deploy-env build ${BUILD_ARGS} ${SERVICE_ARG} - docker compose --env-file /tmp/nexus-deploy-env up -d --force-recreate ${SERVICE_ARG} - else - echo '🚀 Deploying all services' - docker compose --env-file /tmp/nexus-deploy-env build ${BUILD_ARGS} - docker compose --env-file /tmp/nexus-deploy-env up -d --force-recreate - fi - echo '⏳ Waiting for services to become healthy (up to 180s)...' - for i in \$(seq 1 36); do - UNHEALTHY=\$(docker compose --env-file /tmp/nexus-deploy-env ps -a 2>/dev/null | grep -E 'unhealthy' || true) - STARTING=\$(docker compose --env-file /tmp/nexus-deploy-env ps -a 2>/dev/null | grep -E 'starting' || true) - if [ -n \"\$UNHEALTHY\" ]; then - echo \" [\$i/36] Containers unhealthy:\" - echo \"\$UNHEALTHY\" - echo \"Failing fast — unhealthy container detected\" - docker compose --env-file /tmp/nexus-deploy-env logs --tail=30 - exit 1 - elif [ -n \"\$STARTING\" ]; then - echo \" [\$i/36] Still starting...\" - sleep 5 - else - echo '✅ All containers healthy' - docker compose --env-file /tmp/nexus-deploy-env ps -a - exit 0 - fi - done - echo '❌ Timeout waiting for services (180s)' - docker compose --env-file /tmp/nexus-deploy-env ps -a - docker compose --env-file /tmp/nexus-deploy-env logs --tail=20 - exit 1 - " < "${ENV_TMPFILE}" + sh /deploy.sh < "${ENV_TMPFILE}" + + rm -f /tmp/nexus-deploy-script.sh echo "✅ Docker compose up completed"