Mounts agents router and schema init, adds VM123 checks, chat copilot, Desk UI module, isolated docker-compose staging on ports 8180/8192, and full spec documentation without touching production ports. Co-authored-by: Cursor <cursoragent@cursor.com>
266 lines
8.7 KiB
Python
266 lines
8.7 KiB
Python
"""T0/T1 checks — Spec 029."""
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
import sqlite3
|
|
import time
|
|
|
|
import httpx
|
|
|
|
DESK = os.getenv("DESK_PUBLIC_URL", "https://desk.ligbox.com.br")
|
|
VM112 = os.getenv("VM112_API_URL", "http://10.10.10.112:8090")
|
|
WIZARD = os.getenv("WIZARD_ONBOARD_URL", "https://onboard.ligbox.com.br/onboard")
|
|
PFS_URL = os.getenv("PFSENSE_API_URL", "https://firewall.itecnologys.com/api/v2/status/system")
|
|
PFS_USER = os.getenv("PFSENSE_API_USER", "api_cursor")
|
|
PFS_PASS = os.getenv("PFSENSE_API_PASSWORD", "805353")
|
|
PVE = os.getenv("PVE_API_URL", "https://10.10.10.2:8006/api2/json")
|
|
PVE_USER = os.getenv("PVE_USER", "root@pam")
|
|
PVE_PASS = os.getenv("PVE_PASSWORD", "")
|
|
PVE_NODE = os.getenv("PVE_NODE", "big1")
|
|
VMIDS = [int(x) for x in os.getenv("AGENTIC_CRITICAL_VMIDS", "112,122,123,104").split(",") if x.strip()]
|
|
OLLAMA = os.getenv("OLLAMA_BASE_URL", "http://10.10.10.123:11434").rstrip("/")
|
|
VM123_IP = os.getenv("VM123_IP", "10.10.10.123")
|
|
OPENPANEL_BRIDGE = os.getenv("OPENPANEL_BRIDGE_URL", f"http://{VM123_IP}:18087").rstrip("/")
|
|
|
|
|
|
def _http(url, *, auth=None, max_ms=2500):
|
|
t0 = time.perf_counter()
|
|
try:
|
|
with httpx.Client(timeout=15, verify=False, follow_redirects=True) as c:
|
|
r = c.get(url, auth=auth)
|
|
ms = int((time.perf_counter() - t0) * 1000)
|
|
return {"ok": r.status_code == 200 and ms <= max_ms, "status_code": r.status_code, "latency_ms": ms, "url": url}
|
|
except Exception as e:
|
|
return {"ok": False, "error": str(e), "url": url}
|
|
|
|
|
|
def check_desk_api_health():
|
|
r = _http(f"{DESK}/api/health", max_ms=4000)
|
|
return [] if r["ok"] else [
|
|
{
|
|
"severity": "high",
|
|
"category": "api",
|
|
"title": "Desk API health falhou",
|
|
"detail_md": str(r),
|
|
"evidence": r,
|
|
"human_action": "Verificar docker-compose api VM122",
|
|
}
|
|
]
|
|
|
|
|
|
def check_vm112_health():
|
|
out = []
|
|
r1 = _http(f"{VM112}/api/onboarding/health")
|
|
if not r1["ok"]:
|
|
out.append(
|
|
{
|
|
"severity": "high",
|
|
"category": "api",
|
|
"title": "VM112 API down",
|
|
"detail_md": str(r1),
|
|
"evidence": r1,
|
|
"human_action": "systemctl ligbox-wizard VM112",
|
|
}
|
|
)
|
|
r2 = _http(WIZARD, max_ms=4000)
|
|
if not r2["ok"]:
|
|
out.append(
|
|
{
|
|
"severity": "warn",
|
|
"category": "api",
|
|
"title": "Portal /onboard falhou",
|
|
"detail_md": str(r2),
|
|
"evidence": r2,
|
|
"human_action": "Traefik CT114 + VM112",
|
|
}
|
|
)
|
|
return out
|
|
|
|
|
|
def check_pfsense_api():
|
|
r = _http(PFS_URL, auth=(PFS_USER, PFS_PASS), max_ms=4000)
|
|
return [] if r["ok"] else [
|
|
{
|
|
"severity": "warn",
|
|
"category": "infra",
|
|
"title": "pfSense API falhou",
|
|
"detail_md": str(r),
|
|
"evidence": r,
|
|
"human_action": "Validar firewall.itecnologys.com via Traefik",
|
|
}
|
|
]
|
|
|
|
|
|
def check_funnel_stuck(conn, max_stuck=5):
|
|
try:
|
|
c = conn.execute(
|
|
"SELECT COUNT(*) n FROM tickets WHERE status IN ('open','assisting','escalated') "
|
|
"AND (subject LIKE '%onboarding%' OR payload LIKE '%onboarding%') "
|
|
"AND datetime(created_at)<datetime('now','-24 hours')"
|
|
).fetchone()["n"]
|
|
if c <= max_stuck:
|
|
return []
|
|
return [
|
|
{
|
|
"severity": "warn",
|
|
"category": "code",
|
|
"title": f"Funil travado {c} tickets",
|
|
"detail_md": str(c),
|
|
"evidence": {"count": c},
|
|
"human_action": "Rever tickets onboarding — Spec 010 Assist",
|
|
}
|
|
]
|
|
except sqlite3.OperationalError:
|
|
return []
|
|
|
|
|
|
def check_integration_gap(ops_api_url, token):
|
|
if not token:
|
|
return []
|
|
try:
|
|
with httpx.Client(timeout=15) as c:
|
|
r = c.get(f"{ops_api_url}/api/v1/integrations/health", headers={"X-Ops-Internal-Token": token})
|
|
if r.status_code != 200:
|
|
return []
|
|
gap = (r.json().get("vm112_onboard") or {}).get("gap_minutes")
|
|
if gap is None or int(gap) <= 15:
|
|
return []
|
|
return [
|
|
{
|
|
"severity": "high",
|
|
"category": "infra",
|
|
"title": f"Gap webhook {int(gap)}min",
|
|
"detail_md": "VM112 sem eventos recentes",
|
|
"evidence": {"gap": gap},
|
|
"human_action": "Webhooks VM112→122",
|
|
}
|
|
]
|
|
except Exception:
|
|
return []
|
|
|
|
|
|
def check_proxmox_cluster():
|
|
if not PVE_PASS:
|
|
return []
|
|
try:
|
|
with httpx.Client(timeout=15, verify=False) as c:
|
|
t = c.post(f"{PVE}/access/ticket", data={"username": PVE_USER, "password": PVE_PASS})
|
|
if t.status_code != 200:
|
|
return [
|
|
{
|
|
"severity": "warn",
|
|
"category": "infra",
|
|
"title": "Proxmox auth falhou",
|
|
"detail_md": str(t.status_code),
|
|
"evidence": {},
|
|
"human_action": "PVE 10.10.10.2:8006",
|
|
}
|
|
]
|
|
tok = t.json()["data"]["ticket"]
|
|
bad = []
|
|
with httpx.Client(timeout=15, verify=False) as c:
|
|
for vmid in VMIDS:
|
|
r = c.get(
|
|
f"{PVE}/nodes/{PVE_NODE}/qemu/{vmid}/status/current",
|
|
headers={"Cookie": f"PVEAuthCookie={tok}"},
|
|
)
|
|
st = r.json().get("data", {}).get("status") if r.status_code == 200 else "error"
|
|
if st != "running":
|
|
bad.append({"vmid": vmid, "status": st})
|
|
if not bad:
|
|
return []
|
|
return [
|
|
{
|
|
"severity": "critical",
|
|
"category": "infra",
|
|
"title": f"VMs paradas {bad}",
|
|
"detail_md": str(bad),
|
|
"evidence": {"bad": bad},
|
|
"human_action": "qm start no big1",
|
|
}
|
|
]
|
|
except Exception as e:
|
|
return [
|
|
{
|
|
"severity": "info",
|
|
"category": "infra",
|
|
"title": "Proxmox check erro",
|
|
"detail_md": str(e),
|
|
"evidence": {},
|
|
"human_action": "",
|
|
}
|
|
]
|
|
|
|
|
|
def check_ollama_vm123():
|
|
r = _http(f"{OLLAMA}/api/tags", max_ms=5000)
|
|
return [] if r["ok"] else [
|
|
{
|
|
"severity": "high",
|
|
"category": "infra",
|
|
"title": "Ollama VM123 offline",
|
|
"detail_md": str(r),
|
|
"evidence": r,
|
|
"human_action": "systemctl start ollama VM123",
|
|
}
|
|
]
|
|
|
|
|
|
def check_vm123_finance_stack():
|
|
out = []
|
|
foss = _http(f"http://{VM123_IP}:8092/", max_ms=5000)
|
|
if not foss["ok"]:
|
|
out.append(
|
|
{
|
|
"severity": "high",
|
|
"category": "api",
|
|
"title": "FOSSBilling VM123 down",
|
|
"detail_md": str(foss),
|
|
"evidence": foss,
|
|
"human_action": "docker compose VM123 finance stack",
|
|
}
|
|
)
|
|
odoo = _http(f"http://{VM123_IP}:8069/web/login", max_ms=5000)
|
|
if not odoo["ok"]:
|
|
out.append(
|
|
{
|
|
"severity": "warn",
|
|
"category": "api",
|
|
"title": "Odoo VM123 inacessível",
|
|
"detail_md": str(odoo),
|
|
"evidence": odoo,
|
|
"human_action": "Verificar container Odoo VM123",
|
|
}
|
|
)
|
|
return out
|
|
|
|
|
|
def check_vm123_openpanel_bridge():
|
|
r = _http(f"{OPENPANEL_BRIDGE}/health", max_ms=4000)
|
|
if r.get("status_code") == 404:
|
|
r = _http(OPENPANEL_BRIDGE, max_ms=4000)
|
|
return [] if r["ok"] else [
|
|
{
|
|
"severity": "warn",
|
|
"category": "api",
|
|
"title": "OpenPanel bridge VM123 falhou",
|
|
"detail_md": str(r),
|
|
"evidence": r,
|
|
"human_action": f"Bridge {OPENPANEL_BRIDGE}",
|
|
}
|
|
]
|
|
|
|
|
|
SCENARIO_RUNNERS = {
|
|
"desk.api.health": lambda conn, **kw: check_desk_api_health(),
|
|
"wizard.vm112.bundle": lambda conn, **kw: check_vm112_health(),
|
|
"pfsense.api.system": lambda conn, **kw: check_pfsense_api(),
|
|
"funnel.stuck.onboarding": lambda conn, **kw: check_funnel_stuck(conn),
|
|
"integration.webhook.gap": lambda conn, **kw: check_integration_gap(
|
|
kw.get("ops_api_url", ""), kw.get("internal_token", "")
|
|
),
|
|
"proxmox.cluster": lambda conn, **kw: check_proxmox_cluster(),
|
|
"ollama.vm123.health": lambda conn, **kw: check_ollama_vm123(),
|
|
"vm123.finance.stack": lambda conn, **kw: check_vm123_finance_stack(),
|
|
"vm123.openpanel.bridge": lambda conn, **kw: check_vm123_openpanel_bridge(),
|
|
}
|