From 06eac66baa198b918f795e5d97c682399a9ab80a Mon Sep 17 00:00:00 2001 From: DevOps Date: Sat, 20 Jun 2026 18:56:11 +0200 Subject: [PATCH] fix: postgres WAL corruption recovery + memory bump + researcher/executor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Postgres memory: 256M→384M limits, 64M→96M reservations - Added pg_resetwal -f pre-deploy step to recover from corrupt WAL ('PANIC: could not locate a valid checkpoint record' caused by force-killed postgres during --force-recreate) - Added data-checksums initdb arg for future corruption detection - api→postgres and web→api depends_on: service_healthy→service_started - Deploy wait loop: fail fast on unhealthy, wait on starting (180s) - Added researcher/executor to ValidAssignees and frontend dropdowns --- .gitea/workflows/deploy.yaml | 14 ++++++++++++++ backend/Services/TaskService.cs | 2 +- compose.yaml | 5 +++-- frontend/src/views/TaskBoardView.vue | 6 ++++++ phases/changelog.md | 6 ++++++ 5 files changed, 30 insertions(+), 3 deletions(-) diff --git a/.gitea/workflows/deploy.yaml b/.gitea/workflows/deploy.yaml index 67840a4..806a1e8 100644 --- a/.gitea/workflows/deploy.yaml +++ b/.gitea/workflows/deploy.yaml @@ -211,6 +211,20 @@ jobs: set -e trap 'rm -f /tmp/nexus-deploy-env' EXIT cat > /tmp/nexus-deploy-env + + # ── WAL recovery: reset corrupt WAL that can block postgres startup ── + # Force-killed postgres containers can leave stale WAL entries that cause + # 'PANIC: could not locate a valid checkpoint record' on next start. + # pg_resetwal -f clears the WAL (losing uncommitted tx, which were lost anyway). + PG_VOL=\$(docker volume ls -q --filter name=nexus-postgres 2>/dev/null | head -1) + if [ -n \"\$PG_VOL\" ]; then + echo '🩺 Checking postgres WAL integrity...' + docker run --rm -v \"\$PG_VOL:/var/lib/postgresql/data\" \ + --entrypoint sh postgres:17-alpine -c ' + pg_resetwal -f /var/lib/postgresql/data 2>/dev/null && echo \"✅ WAL reset OK\" || echo \"WAL reset not needed / benign error\" + ' 2>/dev/null || echo 'WAL check skipped' + fi + if [ -n '${SERVICE_ARG}' ]; then echo '🚀 Deploying service: ${SERVICE_ARG}' docker compose --env-file /tmp/nexus-deploy-env build ${BUILD_ARGS} ${SERVICE_ARG} diff --git a/backend/Services/TaskService.cs b/backend/Services/TaskService.cs index 0ce6f6f..ef9ea8d 100644 --- a/backend/Services/TaskService.cs +++ b/backend/Services/TaskService.cs @@ -12,7 +12,7 @@ public sealed class TaskService( IHttpContextAccessor httpContextAccessor) : ITaskService { private static readonly HashSet ValidAssignees = - ["bao", "iris", "programmer", "reviewer", "architekt"]; + ["bao", "iris", "programmer", "reviewer", "architekt", "researcher", "executor"]; public async Task> GetAllAsync(CancellationToken ct = default) => await taskRepo.GetAllAsync(ct); diff --git a/compose.yaml b/compose.yaml index 0218a54..6758af8 100644 --- a/compose.yaml +++ b/compose.yaml @@ -7,10 +7,11 @@ services: deploy: resources: limits: - memory: 256M + memory: 384M reservations: - memory: 64M + memory: 96M environment: + POSTGRES_INITDB_ARGS: --data-checksums POSTGRES_DB: ${POSTGRES_DB:-nexus} POSTGRES_USER: ${POSTGRES_USER:-nexus} POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:?Set POSTGRES_PASSWORD in .env} diff --git a/frontend/src/views/TaskBoardView.vue b/frontend/src/views/TaskBoardView.vue index 9f54489..f343e74 100644 --- a/frontend/src/views/TaskBoardView.vue +++ b/frontend/src/views/TaskBoardView.vue @@ -226,6 +226,8 @@ function expectedFromLabel(expected: string | null | undefined): string { 'programmer': '🛠 Programmer', 'reviewer': '🔎 Reviewer', 'architekt': '🏛 Architekt', + 'researcher': '🔬 Researcher', + 'executor': '⚡ Executor', } return map[expected.toLowerCase()] ?? expected } @@ -778,6 +780,8 @@ onUnmounted(() => { + + @@ -896,6 +900,8 @@ onUnmounted(() => { + +