107 lines
3.3 KiB
Python
107 lines
3.3 KiB
Python
"""Integration health checks — Spec 014 SOC lite."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
from datetime import datetime, timezone
|
|
from typing import Any
|
|
|
|
import httpx
|
|
|
|
VM112_API = os.getenv("VM112_API_URL", "http://10.10.10.112:8090")
|
|
WEBHOOK_GAP_ALERT_MIN = int(os.getenv("WEBHOOK_GAP_ALERT_MIN", "15"))
|
|
ONBOARD_SOURCE = "vm112-onboard"
|
|
|
|
|
|
def _parse_payload(raw: str | None) -> dict:
|
|
if not raw:
|
|
return {}
|
|
try:
|
|
return json.loads(raw)
|
|
except json.JSONDecodeError:
|
|
return {}
|
|
|
|
|
|
def _minutes_since(iso_ts: str | None) -> float | None:
|
|
if not iso_ts:
|
|
return None
|
|
try:
|
|
dt = datetime.fromisoformat(iso_ts.replace("Z", "+00:00"))
|
|
if dt.tzinfo is None:
|
|
dt = dt.replace(tzinfo=timezone.utc)
|
|
return (datetime.now(timezone.utc) - dt).total_seconds() / 60.0
|
|
except ValueError:
|
|
return None
|
|
|
|
|
|
def last_webhook_for_source(conn, source: str) -> dict[str, Any] | None:
|
|
row = conn.execute(
|
|
"""
|
|
SELECT event_type, payload, created_at
|
|
FROM webhook_events
|
|
WHERE source = ?
|
|
ORDER BY id DESC
|
|
LIMIT 1
|
|
""",
|
|
(source,),
|
|
).fetchone()
|
|
if not row:
|
|
return None
|
|
payload = _parse_payload(row["payload"])
|
|
return {
|
|
"event": row["event_type"],
|
|
"domain": payload.get("domain"),
|
|
"session_id": payload.get("session_id"),
|
|
"created_at": row["created_at"],
|
|
"minutes_ago": _minutes_since(row["created_at"]),
|
|
}
|
|
|
|
|
|
def vm112_reachable() -> dict[str, Any]:
|
|
try:
|
|
with httpx.Client(timeout=6.0) as client:
|
|
response = client.get(f"{VM112_API}/api/onboarding/health")
|
|
return {
|
|
"reachable": response.status_code == 200,
|
|
"http_status": response.status_code,
|
|
"body": response.json() if response.headers.get("content-type", "").startswith("application/json") else response.text[:120],
|
|
}
|
|
except Exception as exc:
|
|
return {"reachable": False, "http_status": None, "error": str(exc)}
|
|
|
|
|
|
def build_health_report(conn) -> dict[str, Any]:
|
|
last_onboard = last_webhook_for_source(conn, ONBOARD_SOURCE)
|
|
gap_min = last_onboard.get("minutes_ago") if last_onboard else None
|
|
vm112 = vm112_reachable()
|
|
alerts: list[dict[str, str]] = []
|
|
|
|
if not vm112.get("reachable"):
|
|
alerts.append({"level": "critical", "message": "VM112 wizard inacessível"})
|
|
if last_onboard is None:
|
|
alerts.append({"level": "warn", "message": "Nenhum webhook VM112 recebido ainda"})
|
|
elif gap_min is not None and gap_min > WEBHOOK_GAP_ALERT_MIN:
|
|
alerts.append({
|
|
"level": "warn",
|
|
"message": f"Sem webhook VM112 há {int(gap_min)} min (limite {WEBHOOK_GAP_ALERT_MIN} min)",
|
|
})
|
|
|
|
status = "ok"
|
|
if any(a["level"] == "critical" for a in alerts):
|
|
status = "critical"
|
|
elif alerts:
|
|
status = "degraded"
|
|
|
|
return {
|
|
"status": status,
|
|
"webhook_gap_alert_minutes": WEBHOOK_GAP_ALERT_MIN,
|
|
"vm112_onboard": {
|
|
"source": ONBOARD_SOURCE,
|
|
"last_webhook": last_onboard,
|
|
"gap_minutes": gap_min,
|
|
"vm112_api": vm112,
|
|
},
|
|
"alerts": alerts,
|
|
"checked_at": datetime.now(timezone.utc).isoformat(),
|
|
}
|