ops: enhanced deploy verification with web-recovery + incident docs
This commit is contained in:
+35
-1
@@ -25,7 +25,41 @@ docker compose ps
|
|||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
echo "[4/4] Verifikation..."
|
echo "[4/4] Verifikation..."
|
||||||
curl -fsS http://localhost:18880/health && echo " ✅ Health-Check bestanden"
|
check_code() {
|
||||||
|
local path="$1"
|
||||||
|
curl -s -o /dev/null -w "%{http_code}" "http://localhost:18880${path}"
|
||||||
|
}
|
||||||
|
|
||||||
|
HEALTH_CODE=$(check_code /health)
|
||||||
|
DASHBOARD_CODE=$(check_code /dashboard)
|
||||||
|
OPS_CODE=$(check_code /api/v1/operations/snapshot)
|
||||||
|
|
||||||
|
if [ "$HEALTH_CODE" = "200" ] && [ "$DASHBOARD_CODE" != "200" ]; then
|
||||||
|
WEB_CID="$(docker compose ps -q web || true)"
|
||||||
|
if [ -n "$WEB_CID" ]; then
|
||||||
|
WEB_STATE="$(docker inspect -f '{{.State.Status}}' "$WEB_CID" 2>/dev/null || true)"
|
||||||
|
if [ "$WEB_STATE" = "created" ]; then
|
||||||
|
echo " ℹ️ API healthy, aber web noch im Status 'created' — starte web nach"
|
||||||
|
docker compose up -d web
|
||||||
|
sleep 2
|
||||||
|
DASHBOARD_CODE=$(check_code /dashboard)
|
||||||
|
OPS_CODE=$(check_code /api/v1/operations/snapshot)
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo " /health -> ${HEALTH_CODE}"
|
||||||
|
echo " /dashboard -> ${DASHBOARD_CODE}"
|
||||||
|
echo " /api/v1/operations/snapshot -> ${OPS_CODE}"
|
||||||
|
|
||||||
|
if [ "$HEALTH_CODE" != "200" ] || [ "$DASHBOARD_CODE" != "200" ] || [ "$OPS_CODE" != "401" ]; then
|
||||||
|
echo " ❌ Verifikation fehlgeschlagen"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo " ✅ Health-Check bestanden"
|
||||||
|
echo " ✅ Dashboard erreichbar"
|
||||||
|
echo " ✅ Operations API fordert Auth an"
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
echo "=== Deployment abgeschlossen ==="
|
echo "=== Deployment abgeschlossen ==="
|
||||||
|
|||||||
@@ -185,6 +185,16 @@ Stelle sicher, dass `.env` existiert und alle `***`-Platzhalter ersetzt sind.
|
|||||||
- Let's Encrypt TLS-Zertifikat aktiv
|
- Let's Encrypt TLS-Zertifikat aktiv
|
||||||
- Nginx-Proxy → 127.0.0.1:18880
|
- Nginx-Proxy → 127.0.0.1:18880
|
||||||
|
|
||||||
|
## Incident-Hinweis (2026-06-14)
|
||||||
|
|
||||||
|
- Verifizierter Ausfallpfad: `api` konnte wegen DB-Passwort-Mismatch nicht healthy werden; dadurch blieb `web` per `depends_on: service_healthy` im Status `Created`.
|
||||||
|
- Nach einem isolierten API-Fix startet `web` nicht automatisch nach. Sicherer Minimalpfad:
|
||||||
|
1. `docker compose ps`
|
||||||
|
2. `curl http://127.0.0.1:18880/health`
|
||||||
|
3. Falls `health=200`, aber `/dashboard` noch nicht `200` und `web` auf `Created` steht: `docker compose up -d web`
|
||||||
|
4. Danach extern `/dashboard`, `/health` und `/api/v1/operations/snapshot` erneut prüfen
|
||||||
|
- Der manuelle Helper [`ops/deploy.sh`](/home/node/.openclaw/workspace/nexus/ops/deploy.sh) verifiziert deshalb jetzt nicht mehr nur `/health`, sondern auch `/dashboard` und den Auth-Schutz der Operations-API.
|
||||||
|
|
||||||
## Offene Arbeit
|
## Offene Arbeit
|
||||||
|
|
||||||
- [ ] Docker-Socket-Risiko im CD-Workflow final adressieren (kommt spaeter)
|
- [ ] Docker-Socket-Risiko im CD-Workflow final adressieren (kommt spaeter)
|
||||||
|
|||||||
Reference in New Issue
Block a user