Specs stay at repo root (cross-VM). Move deploy and code into logical projects with README per domain, updated manifest.yaml, and symlinks at legacy paths for VM122 backward compatibility.
107 lines
3.3 KiB
Python
107 lines
3.3 KiB
Python
"""Integration health checks — Spec 014 SOC lite."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
from datetime import datetime, timezone
|
|
from typing import Any
|
|
|
|
import httpx
|
|
|
|
VM112_API = os.getenv("VM112_API_URL", "http://10.10.10.112:8090")
|
|
WEBHOOK_GAP_ALERT_MIN = int(os.getenv("WEBHOOK_GAP_ALERT_MIN", "15"))
|
|
ONBOARD_SOURCE = "vm112-onboard"
|
|
|
|
|
|
def _parse_payload(raw: str | None) -> dict:
|
|
if not raw:
|
|
return {}
|
|
try:
|
|
return json.loads(raw)
|
|
except json.JSONDecodeError:
|
|
return {}
|
|
|
|
|
|
def _minutes_since(iso_ts: str | None) -> float | None:
|
|
if not iso_ts:
|
|
return None
|
|
try:
|
|
dt = datetime.fromisoformat(iso_ts.replace("Z", "+00:00"))
|
|
if dt.tzinfo is None:
|
|
dt = dt.replace(tzinfo=timezone.utc)
|
|
return (datetime.now(timezone.utc) - dt).total_seconds() / 60.0
|
|
except ValueError:
|
|
return None
|
|
|
|
|
|
def last_webhook_for_source(conn, source: str) -> dict[str, Any] | None:
|
|
row = conn.execute(
|
|
"""
|
|
SELECT event_type, payload, created_at
|
|
FROM webhook_events
|
|
WHERE source = ?
|
|
ORDER BY id DESC
|
|
LIMIT 1
|
|
""",
|
|
(source,),
|
|
).fetchone()
|
|
if not row:
|
|
return None
|
|
payload = _parse_payload(row["payload"])
|
|
return {
|
|
"event": row["event_type"],
|
|
"domain": payload.get("domain"),
|
|
"session_id": payload.get("session_id"),
|
|
"created_at": row["created_at"],
|
|
"minutes_ago": _minutes_since(row["created_at"]),
|
|
}
|
|
|
|
|
|
def vm112_reachable() -> dict[str, Any]:
|
|
try:
|
|
with httpx.Client(timeout=6.0) as client:
|
|
response = client.get(f"{VM112_API}/api/onboarding/health")
|
|
return {
|
|
"reachable": response.status_code == 200,
|
|
"http_status": response.status_code,
|
|
"body": response.json() if response.headers.get("content-type", "").startswith("application/json") else response.text[:120],
|
|
}
|
|
except Exception as exc:
|
|
return {"reachable": False, "http_status": None, "error": str(exc)}
|
|
|
|
|
|
def build_health_report(conn) -> dict[str, Any]:
|
|
last_onboard = last_webhook_for_source(conn, ONBOARD_SOURCE)
|
|
gap_min = last_onboard.get("minutes_ago") if last_onboard else None
|
|
vm112 = vm112_reachable()
|
|
alerts: list[dict[str, str]] = []
|
|
|
|
if not vm112.get("reachable"):
|
|
alerts.append({"level": "critical", "message": "VM112 wizard inacessível"})
|
|
if last_onboard is None:
|
|
alerts.append({"level": "warn", "message": "Nenhum webhook VM112 recebido ainda"})
|
|
elif gap_min is not None and gap_min > WEBHOOK_GAP_ALERT_MIN:
|
|
alerts.append({
|
|
"level": "warn",
|
|
"message": f"Sem webhook VM112 há {int(gap_min)} min (limite {WEBHOOK_GAP_ALERT_MIN} min)",
|
|
})
|
|
|
|
status = "ok"
|
|
if any(a["level"] == "critical" for a in alerts):
|
|
status = "critical"
|
|
elif alerts:
|
|
status = "degraded"
|
|
|
|
return {
|
|
"status": status,
|
|
"webhook_gap_alert_minutes": WEBHOOK_GAP_ALERT_MIN,
|
|
"vm112_onboard": {
|
|
"source": ONBOARD_SOURCE,
|
|
"last_webhook": last_onboard,
|
|
"gap_minutes": gap_min,
|
|
"vm112_api": vm112,
|
|
},
|
|
"alerts": alerts,
|
|
"checked_at": datetime.now(timezone.utc).isoformat(),
|
|
}
|