fix: use external deploy script to avoid nested quoting errors

The inner shell script run via docker:cli had complex escaping that caused 'unterminated quoted string' errors at runtime. Moved the deploy logic to an external script file (heredoc in the workflow YAML), mounted read-only into the docker:cli container. Pass BUILD_ARGS and SERVICE via environment variables instead of shell interpolation.
2026-06-20 19:00:53 +02:00
parent 73c5eb69d7
commit f0023ac033
1 changed files with 66 additions and 58 deletions
@@ -201,70 +201,78 @@ jobs:
          SERVICE_ARG="${{ github.event_name == 'workflow_dispatch' && inputs.service || '' }}"
          # Write the deploy script to a file to avoid nested quoting issues
          cat > /tmp/nexus-deploy-script.sh << 'DEPLOYSCRIPT'
 #!/bin/sh
 set -e
 trap 'rm -f /tmp/nexus-deploy-env' EXIT
 cat > /tmp/nexus-deploy-env
 # ── Clean up zombie containers ──
 docker compose --env-file /tmp/nexus-deploy-env down --remove-orphans 2>/dev/null || true
 docker rm -f nexus-postgres-1 nexus-api-1 nexus-web-1 2>/dev/null || true
 # ── WAL recovery ──
 PG_VOL=$(docker volume ls -q --filter name=nexus-postgres 2>/dev/null | head -1)
 if [ -n "$PG_VOL" ]; then
  echo "Checking postgres WAL integrity..."
  docker run --rm -v "$PG_VOL:/var/lib/postgresql/data" \
    --entrypoint sh postgres:17-alpine -c "
      echo 'Resetting WAL...'
      pg_resetwal -f /var/lib/postgresql/data && echo 'WAL reset OK'
    " 2>&1 || echo 'pg_resetwal failed (may be benign)'
 else
  echo 'Postgres volume not found - will be created fresh'
 fi
 BUILD_ARGS="${DEPLOY_BUILD_ARGS:-}"
 SERVICE="${DEPLOY_SERVICE:-}"
 if [ -n "$SERVICE" ]; then
  echo "Deploying service: $SERVICE"
  docker compose --env-file /tmp/nexus-deploy-env build $BUILD_ARGS $SERVICE
  docker compose --env-file /tmp/nexus-deploy-env up -d --force-recreate $SERVICE
 else
  echo 'Deploying all services'
  docker compose --env-file /tmp/nexus-deploy-env build $BUILD_ARGS
  docker compose --env-file /tmp/nexus-deploy-env up -d --force-recreate
 fi
 echo 'Waiting for services to become healthy (up to 180s)...'
 for i in $(seq 1 36); do
  STATUS=$(docker compose --env-file /tmp/nexus-deploy-env ps -a 2>/dev/null | tail -n +2)
  if echo "$STATUS" | grep -q 'unhealthy'; then
    echo "  [$i/36] Unhealthy containers - failing fast"
    docker compose --env-file /tmp/nexus-deploy-env ps -a
    docker compose --env-file /tmp/nexus-deploy-env logs --tail=30
    exit 1
  elif echo "$STATUS" | grep -q 'starting'; then
    echo "  [$i/36] Still starting..."
    sleep 5
  else
    echo 'All containers healthy'
    docker compose --env-file /tmp/nexus-deploy-env ps -a
    exit 0
  fi
 done
 echo 'Timeout waiting for services'
 docker compose --env-file /tmp/nexus-deploy-env ps -a
 docker compose --env-file /tmp/nexus-deploy-env logs --tail=20
 exit 1
 DEPLOYSCRIPT
          docker run --rm \
            -e "DEPLOY_BUILD_ARGS=${BUILD_ARGS:-}" \
            -e "DEPLOY_SERVICE=${SERVICE_ARG:-}" \
            -v "${DEPLOY_PATH}:/workspace/nexus" \
            -v /var/run/docker.sock:/var/run/docker.sock \
            -v /tmp/nexus-deploy-script.sh:/deploy.sh:ro \
            -w /workspace/nexus \
            -i \
            docker:cli \
-            sh -c "
+            sh /deploy.sh < "${ENV_TMPFILE}"
-              set -e
+          
-              trap 'rm -f /tmp/nexus-deploy-env' EXIT
+          rm -f /tmp/nexus-deploy-script.sh
              cat > /tmp/nexus-deploy-env
              # ── Clean up zombie containers from failed deploys ──
              docker compose --env-file /tmp/nexus-deploy-env down --remove-orphans 2>/dev/null || true
              docker rm -f nexus-postgres-1 nexus-api-1 nexus-web-1 2>/dev/null || true
              # ── WAL recovery: reset corrupt WAL that can block postgres startup ──
              # Force-killed postgres containers can leave stale WAL entries that cause
              # 'PANIC: could not locate a valid checkpoint record' on next start.
              # pg_resetwal -f clears the WAL (losing uncommitted tx, which were lost anyway).
              PG_VOL=\$(docker volume ls -q --filter name=nexus-postgres 2>/dev/null | head -1)
              if [ -n \"\$PG_VOL\" ]; then
                echo '🩺 Checking postgres WAL integrity...'
                docker run --rm -v \"\$PG_VOL:/var/lib/postgresql/data\" \
                  --entrypoint sh postgres:17-alpine -c '
                    echo "Resetting WAL..."
                    pg_resetwal -f /var/lib/postgresql/data && echo \"✅ WAL reset OK\"
                  ' 2>&1 || echo '⚠️ pg_resetwal failed — postgres may need manual intervention'
              else
                echo '⚠️ Postgres volume not found — will be created fresh'
              fi
              if [ -n '${SERVICE_ARG}' ]; then
                echo '🚀 Deploying service: ${SERVICE_ARG}'
                docker compose --env-file /tmp/nexus-deploy-env build ${BUILD_ARGS} ${SERVICE_ARG}
                docker compose --env-file /tmp/nexus-deploy-env up -d --force-recreate ${SERVICE_ARG}
              else
                echo '🚀 Deploying all services'
                docker compose --env-file /tmp/nexus-deploy-env build ${BUILD_ARGS}
                docker compose --env-file /tmp/nexus-deploy-env up -d --force-recreate
              fi
              echo '⏳ Waiting for services to become healthy (up to 180s)...'
              for i in \$(seq 1 36); do
                UNHEALTHY=\$(docker compose --env-file /tmp/nexus-deploy-env ps -a 2>/dev/null | grep -E 'unhealthy' || true)
                STARTING=\$(docker compose --env-file /tmp/nexus-deploy-env ps -a 2>/dev/null | grep -E 'starting' || true)
                if [ -n \"\$UNHEALTHY\" ]; then
                  echo \"  [\$i/36] Containers unhealthy:\"
                  echo \"\$UNHEALTHY\"
                  echo \"Failing fast — unhealthy container detected\"
                  docker compose --env-file /tmp/nexus-deploy-env logs --tail=30
                  exit 1
                elif [ -n \"\$STARTING\" ]; then
                  echo \"  [\$i/36] Still starting...\"
                  sleep 5
                else
                  echo '✅ All containers healthy'
                  docker compose --env-file /tmp/nexus-deploy-env ps -a
                  exit 0
                fi
              done
              echo '❌ Timeout waiting for services (180s)'
              docker compose --env-file /tmp/nexus-deploy-env ps -a
              docker compose --env-file /tmp/nexus-deploy-env logs --tail=20
              exit 1
            " < "${ENV_TMPFILE}"
          echo "✅ Docker compose up completed"