f95463ef50
Root cause: Dual-source architecture for owner password (Gitea secret ENV_OWNER_PASSWORD vs host .env OWNER_PASSWORD) caused drift when the DB was ever re-seeded or the volume recreated. Changes: - Add SeedAudit entity + migration to track one-time seed operations - EnsureDatabaseAsync checks SeedAudit BEFORE seeding — owner is never re-created even if the Users table is wiped - Deploy and rollback workflows now read OWNER_PASSWORD from the host's persistent .env (single source of truth) instead of Gitea secrets - compose.yaml documented: OWNER_PASSWORD only used during initial seed - Cleanup: .gitignore extended for core dumps, changelog/deployment.md updated with 2026-06-20 session notes After this fix the DB is the single source of truth for the owner password after initial seed. The host .env is the single reference for the initial value.
415 lines
19 KiB
YAML
415 lines
19 KiB
YAML
name: Deploy to Production
|
|
run-name: 🚀 Deploy by @${{ gitea.actor }}
|
|
|
|
# ───────────────────────────────────────────────────────
|
|
# Owner: DevOps (Architekt)
|
|
# CD v3 — 2026-06-13
|
|
#
|
|
# Triggers:
|
|
# 1. AUTOMATIC after successful CI on main (workflow_run)
|
|
# → Uses safe defaults: patch bump, all services, main ref.
|
|
# → Commits marked with [skip ci] are filtered at job level
|
|
# (prevents version-bump loops).
|
|
# 2. MANUAL via workflow_dispatch with full parameter control.
|
|
#
|
|
# Concurrency: one deploy at a time.
|
|
# Queued deploys wait — no race conditions with parallel builds.
|
|
#
|
|
# Version Management:
|
|
# The VERSION file in the repo root is the single source of truth.
|
|
# Version bumps happen in the Dev workflow BEFORE merge to main.
|
|
# The deploy workflow only reads, validates, and logs the version.
|
|
# The [skip ci] filter remains as a safety layer for auto-triggers.
|
|
# ───────────────────────────────────────────────────────
|
|
concurrency:
|
|
group: deploy-production
|
|
cancel-in-progress: false
|
|
|
|
on:
|
|
# ── Auto-Trigger: after successful CI on main ──
|
|
workflow_run:
|
|
workflows: ["CI - Build & Test"]
|
|
types: [completed]
|
|
branches: [main]
|
|
|
|
# ── Manual Trigger (full control) ──
|
|
workflow_dispatch:
|
|
inputs:
|
|
service:
|
|
description: 'Service to deploy (empty = all)'
|
|
required: false
|
|
default: ''
|
|
type: string
|
|
no_cache:
|
|
description: 'Disable Docker build cache'
|
|
required: false
|
|
default: false
|
|
type: boolean
|
|
git_ref:
|
|
description: 'Git ref to deploy (branch, tag, or commit SHA; default: main)'
|
|
required: false
|
|
default: 'main'
|
|
type: string
|
|
|
|
jobs:
|
|
deploy:
|
|
name: Deploy Nexus
|
|
runs-on: ubuntu-latest
|
|
if: |
|
|
(github.event_name == 'workflow_dispatch') ||
|
|
(github.event_name == 'workflow_run' &&
|
|
github.event.workflow_run.conclusion == 'success' &&
|
|
!contains(github.event.workflow_run.head_commit.message, '[skip ci]'))
|
|
|
|
# ── Env for the deploy target path ──
|
|
env:
|
|
DEPLOY_PATH: /home/projekte_bao/openclaw/data/openclaw/workspace/nexus
|
|
ENV_TMPFILE: /tmp/nexus-deploy-env
|
|
ENV_POSTGRES_PASSWORD: ${{ secrets.ENV_POSTGRES_PASSWORD }}
|
|
ENV_JWT_KEY: ${{ secrets.ENV_JWT_KEY }}
|
|
ENV_OPENCLAW_TOKEN: ${{ secrets.ENV_OPENCLAW_TOKEN }}
|
|
# OWNER_PASSWORD is read from the host's persistent .env — NOT from a Gitea secret.
|
|
# This ensures the password stays consistent across deploys and the DB is the
|
|
# single source of truth after initial seed (enforced by SeedAudit guard).
|
|
|
|
steps:
|
|
# ═══════════════════════════════════════════════════
|
|
# Step 1: Checkout
|
|
# ═══════════════════════════════════════════════════
|
|
- name: Checkout
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ github.event_name == 'workflow_dispatch' && inputs.git_ref || 'main' }}
|
|
fetch-depth: 0
|
|
fetch-tags: true
|
|
|
|
# ═══════════════════════════════════════════════════
|
|
# Step 2: Set up Git identity
|
|
# ═══════════════════════════════════════════════════
|
|
- name: Configure Git
|
|
run: |
|
|
git config user.email "devops@noveria.net"
|
|
git config user.name "DevOps"
|
|
|
|
# ═══════════════════════════════════════════════════
|
|
# Step 3: Resolve deploy version
|
|
#
|
|
# Reads VERSION from repo root — the single source of truth.
|
|
# Validates semver format, logs version + git metadata.
|
|
# No git mutation: version bumps happen in the Dev workflow.
|
|
# ═══════════════════════════════════════════════════
|
|
- name: Resolve Version
|
|
id: version
|
|
run: |
|
|
set -euo pipefail
|
|
|
|
# 1. Check VERSION exists
|
|
if [ ! -f VERSION ]; then
|
|
echo "❌ VERSION file not found"
|
|
exit 1
|
|
fi
|
|
|
|
# 2. Read and validate semver format
|
|
VERSION=$(cat VERSION | tr -d '[:space:]')
|
|
if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+$'; then
|
|
echo "❌ Invalid semver in VERSION: '$VERSION'"
|
|
exit 1
|
|
fi
|
|
|
|
# 3. Log version, git ref, and describe
|
|
GIT_REF=$(git rev-parse --short HEAD)
|
|
GIT_DESCRIBE=$(git describe --always --dirty)
|
|
|
|
echo "📦 Deploy version: v${VERSION}"
|
|
echo "🔖 Git ref: ${GIT_REF}"
|
|
echo "🏷️ Git describe: ${GIT_DESCRIBE}"
|
|
|
|
# 4. Set outputs for downstream steps
|
|
echo "version=${VERSION}" >> "$GITEA_OUTPUT"
|
|
echo "mutated_main=false" >> "$GITEA_OUTPUT"
|
|
|
|
# ═══════════════════════════════════════════════════
|
|
# Step 4: Build .env from secrets + host .env (SAFE)
|
|
#
|
|
# Secrets are written to /tmp/nexus-deploy-env — NEVER
|
|
# to a file inside the workspace that gets rsync'd to
|
|
# the host. The temp file is deleted immediately after
|
|
# compose operations complete.
|
|
#
|
|
# OWNER_PASSWORD is read from the host's persistent .env
|
|
# to ensure it stays the single source of truth. Other
|
|
# secrets (POSTGRES_PASSWORD, JWT_KEY, OPENCLAW_TOKEN)
|
|
# come from Gitea secrets.
|
|
# ═══════════════════════════════════════════════════
|
|
- name: Prepare .env (secrets + host .env → temp file)
|
|
run: |
|
|
set -euo pipefail
|
|
|
|
# Read OWNER_PASSWORD from the host's persistent .env
|
|
HOST_OWNER_PASSWORD=""
|
|
if [ -f "${DEPLOY_PATH}/.env" ]; then
|
|
HOST_OWNER_PASSWORD=$(grep '^OWNER_PASSWORD=' "${DEPLOY_PATH}/.env" | cut -d= -f2- || true)
|
|
fi
|
|
if [ -z "${HOST_OWNER_PASSWORD}" ]; then
|
|
echo "❌ OWNER_PASSWORD not found in ${DEPLOY_PATH}/.env"
|
|
echo " The host .env is the single source of truth for the owner password."
|
|
echo " Ensure OWNER_PASSWORD is set in the deploy-path .env before deploying."
|
|
exit 1
|
|
fi
|
|
|
|
cat > "${ENV_TMPFILE}" <<EOF
|
|
# Nexus Production Environment — auto-generated by CD pipeline
|
|
# Managed via Gitea Secrets + host .env → do NOT edit manually on the host.
|
|
# This file lives in /tmp and is removed after deploy completes.
|
|
POSTGRES_DB=nexus
|
|
POSTGRES_USER=nexus
|
|
POSTGRES_PASSWORD=${ENV_POSTGRES_PASSWORD}
|
|
JWT_KEY=${ENV_JWT_KEY}
|
|
JWT_ISSUER=nexus
|
|
JWT_AUDIENCE=nexus-web
|
|
OWNER_EMAIL=vmbao62@hotmail.de
|
|
OWNER_PASSWORD=${HOST_OWNER_PASSWORD}
|
|
OWNER_DISPLAY_NAME=
|
|
OPENCLAW_BASE_URL=http://host.docker.internal:18789
|
|
OPENCLAW_GATEWAY_TOKEN=${ENV_OPENCLAW_TOKEN}
|
|
OPENCLAW_GATEWAY_PASSWORD=
|
|
EOF
|
|
|
|
chmod 600 "${ENV_TMPFILE}"
|
|
echo "✅ .env written to ${ENV_TMPFILE} (mode 600)"
|
|
|
|
# ═══════════════════════════════════════════════════
|
|
# Step 5: Sync code to host (without .env in workspace)
|
|
# ═══════════════════════════════════════════════════
|
|
- name: Sync code to host
|
|
run: |
|
|
set -euo pipefail
|
|
|
|
docker run --rm \
|
|
-v "${{ gitea.workspace }}:/src:ro" \
|
|
-v "${DEPLOY_PATH}:/dest" \
|
|
alpine:latest \
|
|
sh -c "
|
|
cd /src && \
|
|
find . -mindepth 1 -maxdepth 1 \
|
|
! -name .git \
|
|
-exec cp -r {} /dest/ \; && \
|
|
DEST_OWNER=\$(stat -c '%u:%g' /dest) && \
|
|
chown -R \"\$DEST_OWNER\" /dest
|
|
"
|
|
|
|
echo "✅ Code synced to ${DEPLOY_PATH}"
|
|
|
|
# ═══════════════════════════════════════════════════
|
|
# Step 6: Build & Deploy
|
|
#
|
|
# The temp .env file is bind-mounted read-only into the
|
|
# docker:cli container so compose can resolve variables.
|
|
# It is NEVER written into the workspace directory.
|
|
# ═══════════════════════════════════════════════════
|
|
- name: Build & Deploy
|
|
run: |
|
|
set -euo pipefail
|
|
|
|
# Auto-deploy: always use cache. Manual: respect no_cache input.
|
|
NO_CACHE="${{ github.event_name == 'workflow_dispatch' && inputs.no_cache || false }}"
|
|
BUILD_ARGS=""
|
|
if [ "$NO_CACHE" = "true" ]; then
|
|
BUILD_ARGS="--no-cache"
|
|
fi
|
|
|
|
SERVICE_ARG="${{ github.event_name == 'workflow_dispatch' && inputs.service || '' }}"
|
|
|
|
# Write the deploy script to a file to avoid nested quoting issues
|
|
cat > /tmp/nexus-deploy-script.sh << 'DEPLOYSCRIPT'
|
|
#!/bin/sh
|
|
set -e
|
|
trap 'rm -f /tmp/nexus-deploy-env' EXIT
|
|
cat > /tmp/nexus-deploy-env
|
|
|
|
# ── Clean up zombie containers ──
|
|
docker compose --env-file /tmp/nexus-deploy-env down --remove-orphans 2>/dev/null || true
|
|
docker rm -f nexus-postgres-1 nexus-api-1 nexus-web-1 2>/dev/null || true
|
|
|
|
# ── WAL recovery ──
|
|
PG_VOL=$(docker volume ls -q --filter name=nexus-postgres 2>/dev/null | head -1)
|
|
if [ -n "$PG_VOL" ]; then
|
|
echo "Checking postgres WAL integrity..."
|
|
docker run --rm -v "$PG_VOL:/var/lib/postgresql/data" \
|
|
--entrypoint sh postgres:17-alpine -c "
|
|
echo 'Resetting WAL...'
|
|
pg_resetwal -f /var/lib/postgresql/data && echo 'WAL reset OK'
|
|
" 2>&1 || echo 'pg_resetwal failed (may be benign)'
|
|
else
|
|
echo 'Postgres volume not found - will be created fresh'
|
|
fi
|
|
|
|
BUILD_ARGS="${DEPLOY_BUILD_ARGS:-}"
|
|
SERVICE="${DEPLOY_SERVICE:-}"
|
|
|
|
if [ -n "$SERVICE" ]; then
|
|
echo "Deploying service: $SERVICE"
|
|
docker compose --env-file /tmp/nexus-deploy-env build $BUILD_ARGS $SERVICE
|
|
docker compose --env-file /tmp/nexus-deploy-env up -d --force-recreate $SERVICE
|
|
else
|
|
echo 'Deploying all services'
|
|
docker compose --env-file /tmp/nexus-deploy-env build $BUILD_ARGS
|
|
docker compose --env-file /tmp/nexus-deploy-env up -d --force-recreate
|
|
fi
|
|
|
|
echo 'Waiting for services to become healthy (up to 180s)...'
|
|
for i in $(seq 1 36); do
|
|
STATUS=$(docker compose --env-file /tmp/nexus-deploy-env ps -a 2>/dev/null | tail -n +2)
|
|
if echo "$STATUS" | grep -q 'unhealthy'; then
|
|
echo " [$i/36] Unhealthy containers - failing fast"
|
|
docker compose --env-file /tmp/nexus-deploy-env ps -a
|
|
docker compose --env-file /tmp/nexus-deploy-env logs --tail=30
|
|
exit 1
|
|
elif echo "$STATUS" | grep -q 'starting'; then
|
|
echo " [$i/36] Still starting..."
|
|
sleep 5
|
|
else
|
|
echo 'All containers healthy'
|
|
docker compose --env-file /tmp/nexus-deploy-env ps -a
|
|
exit 0
|
|
fi
|
|
done
|
|
echo 'Timeout waiting for services'
|
|
docker compose --env-file /tmp/nexus-deploy-env ps -a
|
|
docker compose --env-file /tmp/nexus-deploy-env logs --tail=20
|
|
exit 1
|
|
DEPLOYSCRIPT
|
|
|
|
docker run --rm \
|
|
-e "DEPLOY_BUILD_ARGS=${BUILD_ARGS:-}" \
|
|
-e "DEPLOY_SERVICE=${SERVICE_ARG:-}" \
|
|
-v "${DEPLOY_PATH}:/workspace/nexus" \
|
|
-v /var/run/docker.sock:/var/run/docker.sock \
|
|
-v /tmp/nexus-deploy-script.sh:/deploy.sh:ro \
|
|
-w /workspace/nexus \
|
|
-i \
|
|
docker:cli \
|
|
sh /deploy.sh < "${ENV_TMPFILE}"
|
|
|
|
rm -f /tmp/nexus-deploy-script.sh
|
|
|
|
echo "✅ Docker compose up completed"
|
|
|
|
# ═══════════════════════════════════════════════════
|
|
# Step 7: Clean up temp .env
|
|
# ═══════════════════════════════════════════════════
|
|
- name: Clean up temp .env
|
|
if: always()
|
|
run: |
|
|
if [ -f "${ENV_TMPFILE}" ]; then
|
|
shred -u "${ENV_TMPFILE}" 2>/dev/null || rm -f "${ENV_TMPFILE}"
|
|
echo "🧹 Temp .env removed"
|
|
fi
|
|
|
|
# ═══════════════════════════════════════════════════
|
|
# Step 8: Health Check (exponential backoff)
|
|
# ═══════════════════════════════════════════════════
|
|
- name: Health Check
|
|
run: |
|
|
echo "🏥 Health check..."
|
|
RETRY=0
|
|
MAX=6
|
|
WAIT=1
|
|
while [ $RETRY -lt $MAX ]; do
|
|
RETRY=$((RETRY + 1))
|
|
if curl -sf --max-time 10 https://nexus.noveria.net/health; then
|
|
echo ""
|
|
echo "✅ Health check passed (attempt $RETRY/$MAX)"
|
|
exit 0
|
|
fi
|
|
echo "⏳ Attempt $RETRY/$MAX failed, waiting ${WAIT}s..."
|
|
sleep $WAIT
|
|
# Fibonacci-ish backoff: 1,2,3,5,8,13
|
|
NEXT=$((WAIT + RETRY))
|
|
[ $NEXT -le 15 ] && WAIT=$NEXT || WAIT=15
|
|
done
|
|
echo "❌ Health check failed after $MAX attempts"
|
|
exit 1
|
|
|
|
# ═══════════════════════════════════════════════════
|
|
# Step 9: Smoke Test
|
|
# ═══════════════════════════════════════════════════
|
|
- name: Smoke Test
|
|
run: |
|
|
echo "🔍 Smoke test..."
|
|
PASS=0
|
|
FAIL=0
|
|
BASE="https://nexus.noveria.net"
|
|
|
|
check() {
|
|
local path="$1" label="$2" expected="${3:-200}"
|
|
local code
|
|
code=$(curl -s -o /dev/null -w "%{http_code}" --max-time 10 "${BASE}${path}")
|
|
printf " %-25s HTTP %s" "${label}:" "${code}"
|
|
if [ "$code" = "$expected" ]; then
|
|
echo " ✅"
|
|
PASS=$((PASS + 1))
|
|
else
|
|
echo " ❌ (expected $expected)"
|
|
FAIL=$((FAIL + 1))
|
|
fi
|
|
}
|
|
|
|
check "/dashboard" "Dashboard" 200
|
|
check "/health" "Health API" 200
|
|
check "/api/v1/operations/snapshot" "Operations API (auth)" 401
|
|
|
|
echo ""
|
|
echo "Results: $PASS passed, $FAIL failed"
|
|
if [ "$FAIL" -gt 0 ]; then
|
|
echo "❌ Smoke test failed!"
|
|
exit 1
|
|
fi
|
|
echo "✅ Smoke test passed — v${{ steps.version.outputs.version }} is live"
|
|
|
|
# ═══════════════════════════════════════════════════
|
|
# Step 10: Deployment Summary
|
|
# ═══════════════════════════════════════════════════
|
|
- name: Deployment Summary
|
|
if: always()
|
|
run: |
|
|
TRIGGER="${{ github.event_name == 'workflow_run' && 'Auto (CI success)' || 'Manual (workflow_dispatch)' }}"
|
|
echo ""
|
|
echo "═══════════════════════════════════════"
|
|
echo " 📦 Deploy Summary"
|
|
echo "═══════════════════════════════════════"
|
|
echo " Version: v${{ steps.version.outputs.version }}"
|
|
echo " Git ref: ${{ github.event_name == 'workflow_dispatch' && inputs.git_ref || 'main' }}"
|
|
echo " Service: ${{ github.event_name == 'workflow_dispatch' && inputs.service || 'all' }}"
|
|
echo " Trigger: ${TRIGGER}"
|
|
echo " Actor: @${{ gitea.actor }}"
|
|
echo " Status: ${{ job.status }}"
|
|
echo "═══════════════════════════════════════"
|
|
|
|
# ═══════════════════════════════════════════════════
|
|
# Step 11: Failure → Reviewer Handoff
|
|
#
|
|
# On failure: DevOps (Architekt) analyses the log,
|
|
# notifies Reviewer (Code-Fixer) with the exact error.
|
|
# This output provides a ready-to-copy message.
|
|
# ═══════════════════════════════════════════════════
|
|
- name: 🔴 Failure — Reviewer Handoff
|
|
if: failure()
|
|
run: |
|
|
echo ""
|
|
echo "┌─────────────────────────────────────────────────────────────┐"
|
|
echo "│ 🔴 DEPLOY FAILED — Reviewer muss fixen │"
|
|
echo "├─────────────────────────────────────────────────────────────┤"
|
|
echo "│ │"
|
|
echo "│ Version: v${{ steps.version.outputs.version }}"
|
|
echo "│ Job: ${{ gitea.server_url }}/${{ gitea.repository }}/actions/runs/${{ gitea.run_id }}"
|
|
echo "│ │"
|
|
echo "│ → DevOps (Architekt) analysiert den Fehler │"
|
|
echo "│ → Reviewer (Code-Fixer) behebt das Problem │"
|
|
echo "│ → DevOps verifiziert mit neuem Deploy │"
|
|
echo "│ │"
|
|
echo "│ Rollback: Trigger 'Rollback to Previous Version' │"
|
|
echo "│ workflow manuell in Gitea Actions. │"
|
|
echo "│ │"
|
|
echo "└─────────────────────────────────────────────────────────────┘"
|