Fix purge 500 when SQLite database is locked (Spec 017).
Enable WAL/busy_timeout, retry writes, reject duplicate running jobs with HTTP 409, use bcrypt directly instead of broken passlib 1.7.4 + bcrypt 4.x, and improve UI errors.
This commit is contained in:
parent
06d0dcc977
commit
0ee4845818
5 changed files with 229 additions and 61 deletions
|
|
@ -13,7 +13,7 @@ from typing import Any
|
||||||
|
|
||||||
from fastapi import Depends, Header, HTTPException, Request
|
from fastapi import Depends, Header, HTTPException, Request
|
||||||
from jose import JWTError, jwt
|
from jose import JWTError, jwt
|
||||||
from passlib.context import CryptContext
|
import bcrypt
|
||||||
|
|
||||||
from app.totp_util import verify_code as verify_totp_code
|
from app.totp_util import verify_code as verify_totp_code
|
||||||
|
|
||||||
|
|
@ -26,8 +26,6 @@ DESK_BOOTSTRAP_PASSWORD = os.getenv("DESK_BOOTSTRAP_PASSWORD", "805353")
|
||||||
AUTH_LOGIN_RATE_LIMIT = int(os.getenv("AUTH_LOGIN_RATE_LIMIT", "5"))
|
AUTH_LOGIN_RATE_LIMIT = int(os.getenv("AUTH_LOGIN_RATE_LIMIT", "5"))
|
||||||
OPS_INTERNAL_TOKEN = os.getenv("OPS_INTERNAL_TOKEN", "")
|
OPS_INTERNAL_TOKEN = os.getenv("OPS_INTERNAL_TOKEN", "")
|
||||||
|
|
||||||
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
|
|
||||||
|
|
||||||
_login_attempts: dict[str, list[float]] = {}
|
_login_attempts: dict[str, list[float]] = {}
|
||||||
_mfa_pending: dict[str, tuple[str, float]] = {}
|
_mfa_pending: dict[str, tuple[str, float]] = {}
|
||||||
MFA_TOKEN_TTL_SEC = 300
|
MFA_TOKEN_TTL_SEC = 300
|
||||||
|
|
@ -54,17 +52,24 @@ class DeskUser:
|
||||||
|
|
||||||
def db() -> sqlite3.Connection:
|
def db() -> sqlite3.Connection:
|
||||||
DB_PATH.parent.mkdir(parents=True, exist_ok=True)
|
DB_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||||
conn = sqlite3.connect(DB_PATH)
|
conn = sqlite3.connect(DB_PATH, timeout=30.0)
|
||||||
conn.row_factory = sqlite3.Row
|
conn.row_factory = sqlite3.Row
|
||||||
|
conn.execute("PRAGMA journal_mode=WAL")
|
||||||
|
conn.execute("PRAGMA busy_timeout=30000")
|
||||||
return conn
|
return conn
|
||||||
|
|
||||||
|
|
||||||
def hash_password(password: str) -> str:
|
def hash_password(password: str) -> str:
|
||||||
return pwd_context.hash(password)
|
return bcrypt.hashpw(password.encode("utf-8"), bcrypt.gensalt()).decode("utf-8")
|
||||||
|
|
||||||
|
|
||||||
def verify_password(plain: str, hashed: str) -> bool:
|
def verify_password(plain: str, hashed: str) -> bool:
|
||||||
return pwd_context.verify(plain, hashed)
|
if not plain or not hashed:
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
return bcrypt.checkpw(plain.encode("utf-8"), hashed.encode("utf-8"))
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def init_auth_schema(conn: sqlite3.Connection) -> None:
|
def init_auth_schema(conn: sqlite3.Connection) -> None:
|
||||||
|
|
|
||||||
|
|
@ -133,8 +133,10 @@ TICKET_COLUMNS = "id,tenant_id,subject,status,payload,created_at,assigned_to,ass
|
||||||
|
|
||||||
def db():
|
def db():
|
||||||
DB_PATH.parent.mkdir(parents=True, exist_ok=True)
|
DB_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||||
conn = sqlite3.connect(DB_PATH)
|
conn = sqlite3.connect(DB_PATH, timeout=30.0)
|
||||||
conn.row_factory = sqlite3.Row
|
conn.row_factory = sqlite3.Row
|
||||||
|
conn.execute("PRAGMA journal_mode=WAL")
|
||||||
|
conn.execute("PRAGMA busy_timeout=30000")
|
||||||
return conn
|
return conn
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -172,6 +174,11 @@ def init_db():
|
||||||
from app import billing_store
|
from app import billing_store
|
||||||
migration_store.init_schema(conn)
|
migration_store.init_schema(conn)
|
||||||
billing_store.init_schema(conn)
|
billing_store.init_schema(conn)
|
||||||
|
from app.vm112_purge_jobs import init_purge_jobs_schema
|
||||||
|
|
||||||
|
init_purge_jobs_schema(conn)
|
||||||
|
conn.execute("PRAGMA journal_mode=WAL")
|
||||||
|
conn.execute("PRAGMA busy_timeout=30000")
|
||||||
conn.commit()
|
conn.commit()
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,8 @@
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import sqlite3
|
||||||
|
|
||||||
from fastapi import APIRouter, Depends, HTTPException
|
from fastapi import APIRouter, Depends, HTTPException
|
||||||
from fastapi.responses import StreamingResponse
|
from fastapi.responses import StreamingResponse
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
@ -9,7 +11,13 @@ from pydantic import BaseModel, Field
|
||||||
from app import auth, vm112_domains
|
from app import auth, vm112_domains
|
||||||
from app.permissions import can_manage_vm112_domains
|
from app.permissions import can_manage_vm112_domains
|
||||||
from app.vm112_purge_stream import purge_sse_generator
|
from app.vm112_purge_stream import purge_sse_generator
|
||||||
from app.vm112_purge_jobs import get_job_public, list_jobs, recover_job, start_job
|
from app.vm112_purge_jobs import (
|
||||||
|
PurgeJobConflictError,
|
||||||
|
get_job_public,
|
||||||
|
list_jobs,
|
||||||
|
recover_job,
|
||||||
|
start_job,
|
||||||
|
)
|
||||||
|
|
||||||
router = APIRouter(prefix="/api/v1/vm112", tags=["vm112-domains"])
|
router = APIRouter(prefix="/api/v1/vm112", tags=["vm112-domains"])
|
||||||
|
|
||||||
|
|
@ -116,7 +124,17 @@ def start_purge_job(
|
||||||
):
|
):
|
||||||
"""Inicia purge em background; consultar GET /purge/jobs/{id} (recomendado via Traefik)."""
|
"""Inicia purge em background; consultar GET /purge/jobs/{id} (recomendado via Traefik)."""
|
||||||
domain = _validate_purge_request(domain, body)
|
domain = _validate_purge_request(domain, body)
|
||||||
job_id = start_job(domain, body.root_password, user.username)
|
try:
|
||||||
|
job_id = start_job(domain, body.root_password, user.username)
|
||||||
|
except PurgeJobConflictError as exc:
|
||||||
|
raise HTTPException(409, str(exc)) from exc
|
||||||
|
except sqlite3.OperationalError as exc:
|
||||||
|
if "locked" in str(exc).lower():
|
||||||
|
raise HTTPException(
|
||||||
|
503,
|
||||||
|
"Base de dados ocupada — aguarde o purge em curso ou tente novamente em alguns segundos",
|
||||||
|
) from exc
|
||||||
|
raise HTTPException(500, f"Erro SQLite ao iniciar purge: {exc}") from exc
|
||||||
return {"ok": True, "job_id": job_id, "domain": domain, "status": "running"}
|
return {"ok": True, "job_id": job_id, "domain": domain, "status": "running"}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,9 @@
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
import sqlite3
|
||||||
import threading
|
import threading
|
||||||
|
import time
|
||||||
import traceback
|
import traceback
|
||||||
import uuid
|
import uuid
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
|
|
@ -12,6 +14,11 @@ from typing import Any, Callable
|
||||||
from app import auth, vm112_domains
|
from app import auth, vm112_domains
|
||||||
|
|
||||||
_lock = threading.Lock()
|
_lock = threading.Lock()
|
||||||
|
_schema_ready = False
|
||||||
|
|
||||||
|
|
||||||
|
class PurgeJobConflictError(Exception):
|
||||||
|
"""Já existe purge queued/running para o domínio."""
|
||||||
|
|
||||||
|
|
||||||
def _now() -> str:
|
def _now() -> str:
|
||||||
|
|
@ -36,17 +43,42 @@ def init_purge_jobs_schema(conn) -> None:
|
||||||
)
|
)
|
||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
|
conn.execute(
|
||||||
|
"""
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_vm112_purge_jobs_domain_status
|
||||||
|
ON vm112_purge_jobs(domain, status)
|
||||||
|
"""
|
||||||
|
)
|
||||||
conn.commit()
|
conn.commit()
|
||||||
|
|
||||||
|
|
||||||
def _ensure_schema() -> None:
|
def _ensure_schema() -> None:
|
||||||
|
global _schema_ready
|
||||||
|
if _schema_ready:
|
||||||
|
return
|
||||||
conn = auth.db()
|
conn = auth.db()
|
||||||
try:
|
try:
|
||||||
init_purge_jobs_schema(conn)
|
init_purge_jobs_schema(conn)
|
||||||
|
_schema_ready = True
|
||||||
finally:
|
finally:
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def _sqlite_retry(fn: Callable[[], Any], *, attempts: int = 8) -> Any:
|
||||||
|
last_err: Exception | None = None
|
||||||
|
for attempt in range(attempts):
|
||||||
|
try:
|
||||||
|
return fn()
|
||||||
|
except sqlite3.OperationalError as exc:
|
||||||
|
last_err = exc
|
||||||
|
if "locked" not in str(exc).lower() or attempt >= attempts - 1:
|
||||||
|
raise
|
||||||
|
time.sleep(min(0.05 * (2**attempt), 1.0))
|
||||||
|
if last_err:
|
||||||
|
raise last_err
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def _row_to_job(row) -> dict[str, Any]:
|
def _row_to_job(row) -> dict[str, Any]:
|
||||||
return {
|
return {
|
||||||
"id": row["id"],
|
"id": row["id"],
|
||||||
|
|
@ -66,64 +98,124 @@ def _row_to_job(row) -> dict[str, Any]:
|
||||||
|
|
||||||
def _persist_job(job: dict[str, Any]) -> None:
|
def _persist_job(job: dict[str, Any]) -> None:
|
||||||
_ensure_schema()
|
_ensure_schema()
|
||||||
conn = auth.db()
|
|
||||||
try:
|
def _write() -> None:
|
||||||
job["updated_at"] = _now()
|
conn = auth.db()
|
||||||
conn.execute(
|
try:
|
||||||
"""
|
job["updated_at"] = _now()
|
||||||
INSERT INTO vm112_purge_jobs (
|
conn.execute(
|
||||||
id, domain, status, timeline_json, elapsed_vm112,
|
"""
|
||||||
desk_json, vm112_json, error, by_user, created_at, updated_at
|
INSERT INTO vm112_purge_jobs (
|
||||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
id, domain, status, timeline_json, elapsed_vm112,
|
||||||
ON CONFLICT(id) DO UPDATE SET
|
desk_json, vm112_json, error, by_user, created_at, updated_at
|
||||||
status = excluded.status,
|
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||||
timeline_json = excluded.timeline_json,
|
ON CONFLICT(id) DO UPDATE SET
|
||||||
elapsed_vm112 = excluded.elapsed_vm112,
|
status = excluded.status,
|
||||||
desk_json = excluded.desk_json,
|
timeline_json = excluded.timeline_json,
|
||||||
vm112_json = excluded.vm112_json,
|
elapsed_vm112 = excluded.elapsed_vm112,
|
||||||
error = excluded.error,
|
desk_json = excluded.desk_json,
|
||||||
by_user = excluded.by_user,
|
vm112_json = excluded.vm112_json,
|
||||||
updated_at = excluded.updated_at
|
error = excluded.error,
|
||||||
""",
|
by_user = excluded.by_user,
|
||||||
(
|
updated_at = excluded.updated_at
|
||||||
job["id"],
|
""",
|
||||||
job["domain"],
|
(
|
||||||
job["status"],
|
job["id"],
|
||||||
json.dumps(job.get("timeline") or [], ensure_ascii=False),
|
job["domain"],
|
||||||
int(job.get("elapsed_vm112") or 0),
|
job["status"],
|
||||||
json.dumps(job.get("desk") or {}, ensure_ascii=False),
|
json.dumps(job.get("timeline") or [], ensure_ascii=False),
|
||||||
json.dumps(job.get("vm112") or {}, ensure_ascii=False),
|
int(job.get("elapsed_vm112") or 0),
|
||||||
job.get("error"),
|
json.dumps(job.get("desk") or {}, ensure_ascii=False),
|
||||||
job.get("by"),
|
json.dumps(job.get("vm112") or {}, ensure_ascii=False),
|
||||||
job.get("created_at") or _now(),
|
job.get("error"),
|
||||||
job["updated_at"],
|
job.get("by"),
|
||||||
),
|
job.get("created_at") or _now(),
|
||||||
)
|
job["updated_at"],
|
||||||
conn.commit()
|
),
|
||||||
finally:
|
)
|
||||||
conn.close()
|
conn.commit()
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
_sqlite_retry(_write)
|
||||||
|
|
||||||
|
|
||||||
def _load_job(job_id: str) -> dict[str, Any] | None:
|
def _load_job(job_id: str) -> dict[str, Any] | None:
|
||||||
_ensure_schema()
|
_ensure_schema()
|
||||||
conn = auth.db()
|
|
||||||
try:
|
def _read() -> dict[str, Any] | None:
|
||||||
row = conn.execute(
|
conn = auth.db()
|
||||||
"SELECT * FROM vm112_purge_jobs WHERE id = ?", (job_id,)
|
try:
|
||||||
).fetchone()
|
row = conn.execute(
|
||||||
return _row_to_job(row) if row else None
|
"SELECT * FROM vm112_purge_jobs WHERE id = ?", (job_id,)
|
||||||
finally:
|
).fetchone()
|
||||||
conn.close()
|
return _row_to_job(row) if row else None
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
return _sqlite_retry(_read)
|
||||||
|
|
||||||
|
|
||||||
|
def _find_active_job_locked(conn: sqlite3.Connection, domain: str) -> sqlite3.Row | None:
|
||||||
|
return conn.execute(
|
||||||
|
"""
|
||||||
|
SELECT id, domain, status, created_at FROM vm112_purge_jobs
|
||||||
|
WHERE domain = ? AND status IN ('queued', 'running')
|
||||||
|
ORDER BY created_at DESC LIMIT 1
|
||||||
|
""",
|
||||||
|
(domain.lower().strip(),),
|
||||||
|
).fetchone()
|
||||||
|
|
||||||
|
|
||||||
def _mutate_job(job_id: str, fn: Callable[[dict[str, Any]], None]) -> dict[str, Any] | None:
|
def _mutate_job(job_id: str, fn: Callable[[dict[str, Any]], None]) -> dict[str, Any] | None:
|
||||||
with _lock:
|
with _lock:
|
||||||
job = _load_job(job_id)
|
def _mutate() -> dict[str, Any] | None:
|
||||||
if not job:
|
conn = auth.db()
|
||||||
return None
|
try:
|
||||||
fn(job)
|
row = conn.execute(
|
||||||
_persist_job(job)
|
"SELECT * FROM vm112_purge_jobs WHERE id = ?", (job_id,)
|
||||||
return dict(job)
|
).fetchone()
|
||||||
|
if not row:
|
||||||
|
return None
|
||||||
|
job = _row_to_job(row)
|
||||||
|
fn(job)
|
||||||
|
job["updated_at"] = _now()
|
||||||
|
conn.execute(
|
||||||
|
"""
|
||||||
|
INSERT INTO vm112_purge_jobs (
|
||||||
|
id, domain, status, timeline_json, elapsed_vm112,
|
||||||
|
desk_json, vm112_json, error, by_user, created_at, updated_at
|
||||||
|
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||||
|
ON CONFLICT(id) DO UPDATE SET
|
||||||
|
status = excluded.status,
|
||||||
|
timeline_json = excluded.timeline_json,
|
||||||
|
elapsed_vm112 = excluded.elapsed_vm112,
|
||||||
|
desk_json = excluded.desk_json,
|
||||||
|
vm112_json = excluded.vm112_json,
|
||||||
|
error = excluded.error,
|
||||||
|
by_user = excluded.by_user,
|
||||||
|
updated_at = excluded.updated_at
|
||||||
|
""",
|
||||||
|
(
|
||||||
|
job["id"],
|
||||||
|
job["domain"],
|
||||||
|
job["status"],
|
||||||
|
json.dumps(job.get("timeline") or [], ensure_ascii=False),
|
||||||
|
int(job.get("elapsed_vm112") or 0),
|
||||||
|
json.dumps(job.get("desk") or {}, ensure_ascii=False),
|
||||||
|
json.dumps(job.get("vm112") or {}, ensure_ascii=False),
|
||||||
|
job.get("error"),
|
||||||
|
job.get("by"),
|
||||||
|
job.get("created_at") or _now(),
|
||||||
|
job["updated_at"],
|
||||||
|
),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
return dict(job)
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
return _sqlite_retry(_mutate)
|
||||||
|
|
||||||
|
|
||||||
def _upsert_step(job_id: str, step: dict[str, str]) -> None:
|
def _upsert_step(job_id: str, step: dict[str, str]) -> None:
|
||||||
|
|
@ -143,12 +235,13 @@ def _set_job(job_id: str, **fields: Any) -> None:
|
||||||
|
|
||||||
|
|
||||||
def create_job(domain: str, username: str) -> str:
|
def create_job(domain: str, username: str) -> str:
|
||||||
|
domain = domain.lower().strip()
|
||||||
job_id = uuid.uuid4().hex[:16]
|
job_id = uuid.uuid4().hex[:16]
|
||||||
now = _now()
|
now = _now()
|
||||||
job = {
|
job = {
|
||||||
"id": job_id,
|
"id": job_id,
|
||||||
"job_id": job_id,
|
"job_id": job_id,
|
||||||
"domain": domain.lower().strip(),
|
"domain": domain,
|
||||||
"status": "queued",
|
"status": "queued",
|
||||||
"timeline": [],
|
"timeline": [],
|
||||||
"elapsed_vm112": 0,
|
"elapsed_vm112": 0,
|
||||||
|
|
@ -159,8 +252,43 @@ def create_job(domain: str, username: str) -> str:
|
||||||
"created_at": now,
|
"created_at": now,
|
||||||
"updated_at": now,
|
"updated_at": now,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _create() -> None:
|
||||||
|
conn = auth.db()
|
||||||
|
try:
|
||||||
|
active = _find_active_job_locked(conn, domain)
|
||||||
|
if active:
|
||||||
|
raise PurgeJobConflictError(
|
||||||
|
f"Purge já em curso para {domain} (job {active['id']}, status {active['status']})"
|
||||||
|
)
|
||||||
|
conn.execute(
|
||||||
|
"""
|
||||||
|
INSERT INTO vm112_purge_jobs (
|
||||||
|
id, domain, status, timeline_json, elapsed_vm112,
|
||||||
|
desk_json, vm112_json, error, by_user, created_at, updated_at
|
||||||
|
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||||
|
""",
|
||||||
|
(
|
||||||
|
job["id"],
|
||||||
|
job["domain"],
|
||||||
|
job["status"],
|
||||||
|
"[]",
|
||||||
|
0,
|
||||||
|
"{}",
|
||||||
|
"{}",
|
||||||
|
None,
|
||||||
|
job["by"],
|
||||||
|
job["created_at"],
|
||||||
|
job["updated_at"],
|
||||||
|
),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
with _lock:
|
with _lock:
|
||||||
_persist_job(job)
|
_ensure_schema()
|
||||||
|
_sqlite_retry(_create)
|
||||||
return job_id
|
return job_id
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -140,6 +140,16 @@ const DeskServices = (() => {
|
||||||
let errText = typeof detail === 'string' ? detail : JSON.stringify(detail || `${res.status}`);
|
let errText = typeof detail === 'string' ? detail : JSON.stringify(detail || `${res.status}`);
|
||||||
if (res.status === 504) {
|
if (res.status === 504) {
|
||||||
errText = '504 Gateway Timeout — o purge pode demorar vários minutos. Verifique na VM112 se concluiu antes de repetir.';
|
errText = '504 Gateway Timeout — o purge pode demorar vários minutos. Verifique na VM112 se concluiu antes de repetir.';
|
||||||
|
} else if (res.status === 409) {
|
||||||
|
errText = typeof detail === 'string'
|
||||||
|
? detail
|
||||||
|
: 'Purge já em curso para este domínio — aguarde a conclusão no drawer lateral.';
|
||||||
|
} else if (res.status === 503) {
|
||||||
|
errText = typeof detail === 'string'
|
||||||
|
? detail
|
||||||
|
: 'Servidor ocupado — aguarde alguns segundos e tente novamente.';
|
||||||
|
} else if (res.status === 500 && errText === '500') {
|
||||||
|
errText = 'Erro interno ao iniciar purge — verifique logs da API Desk (VM122).';
|
||||||
}
|
}
|
||||||
throw new Error(errText);
|
throw new Error(errText);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue