ligbox-ops-platform/projects/ops-desk/api/app/agents/store.py
Ligbox Spec Hub fd491e5859 Implement Spec 030 Agentic Ops Mission Board (UI-A/B/C).
Add agent_incidents dedup, overview/incidents/timeline API, mission board UI with fleet rail, kanban, context panel, mobile tabs, poll and keyboard shortcuts.

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-20 06:49:38 +00:00

366 lines
16 KiB
Python

"""Persistence Agentic Ops."""
from __future__ import annotations
import json, sqlite3, time
from datetime import datetime, timezone
from typing import Any
from app.agents.messages import init_messages_schema
def _now():
return datetime.now(timezone.utc).isoformat()
def _exec_retry(conn, sql, params=(), *, attempts=8):
for attempt in range(attempts):
try:
return conn.execute(sql, params)
except sqlite3.OperationalError as exc:
if "locked" not in str(exc).lower() or attempt >= attempts - 1:
raise
time.sleep(0.25 * (attempt + 1))
def init_agent_schema(conn):
init_messages_schema(conn)
conn.executescript("""
CREATE TABLE IF NOT EXISTS agent_scenarios (
id TEXT PRIMARY KEY, title TEXT NOT NULL, schedule TEXT,
severity_default TEXT NOT NULL DEFAULT 'warn', config_json TEXT NOT NULL,
enabled INTEGER NOT NULL DEFAULT 1, updated_at TEXT NOT NULL);
CREATE TABLE IF NOT EXISTS agent_runs (
id INTEGER PRIMARY KEY, scenario_id TEXT NOT NULL, trigger TEXT NOT NULL DEFAULT 'cron',
status TEXT NOT NULL, summary_text TEXT, llm_model TEXT, metadata_json TEXT,
started_at TEXT NOT NULL, finished_at TEXT);
CREATE TABLE IF NOT EXISTS agent_findings (
id INTEGER PRIMARY KEY, run_id INTEGER NOT NULL, severity TEXT NOT NULL,
category TEXT NOT NULL DEFAULT 'api', title TEXT NOT NULL, detail_md TEXT,
evidence_json TEXT, suggested_human_action TEXT, kb_refs_json TEXT,
acknowledged_at TEXT, acknowledged_by TEXT, created_at TEXT NOT NULL);
CREATE TABLE IF NOT EXISTS agent_action_log (
id INTEGER PRIMARY KEY, ts TEXT NOT NULL, agent_id TEXT NOT NULL DEFAULT 'sentinel',
run_id INTEGER, event_type TEXT NOT NULL, message TEXT, payload_json TEXT);
CREATE TABLE IF NOT EXISTS agent_kb_chunks (
id INTEGER PRIMARY KEY, source_path TEXT NOT NULL, chunk_text TEXT NOT NULL, indexed_at TEXT NOT NULL);
CREATE INDEX IF NOT EXISTS idx_agent_runs_scenario ON agent_runs(scenario_id);
CREATE TABLE IF NOT EXISTS agent_incidents (
id INTEGER PRIMARY KEY,
scenario_id TEXT NOT NULL UNIQUE,
primary_agent TEXT NOT NULL,
severity TEXT NOT NULL,
status TEXT NOT NULL DEFAULT 'open',
title TEXT NOT NULL,
latest_finding_id INTEGER,
occurrence_count INTEGER NOT NULL DEFAULT 1,
first_seen_at TEXT NOT NULL,
last_seen_at TEXT NOT NULL,
suggested_human_action TEXT,
thread_id INTEGER,
acknowledged_at TEXT,
acknowledged_by TEXT);
CREATE INDEX IF NOT EXISTS idx_agent_incidents_status ON agent_incidents(status);
""")
try:
if conn.execute("SELECT COUNT(*) FROM agent_incidents").fetchone()[0] == 0:
backfill_incidents(conn)
except sqlite3.OperationalError:
pass
def log_event(conn, *, event_type, message, agent_id="sentinel", run_id=None, payload=None):
conn.execute("INSERT INTO agent_action_log (ts,agent_id,run_id,event_type,message,payload_json) VALUES (?,?,?,?,?,?)",
(_now(), agent_id, run_id, event_type, message, json.dumps(payload or {})))
try:
conn.execute("INSERT INTO desk_security_audit (username,event_type,client_ip,payload,created_at) VALUES (?,?,?,?,?)",
("agentic", f"agent.{event_type}", "vm122", json.dumps({"message": message, **(payload or {})}), _now()))
except sqlite3.OperationalError:
pass
def upsert_scenario(conn, scenario):
conn.execute("""INSERT INTO agent_scenarios (id,title,schedule,severity_default,config_json,enabled,updated_at)
VALUES (?,?,?,?,?,1,?) ON CONFLICT(id) DO UPDATE SET title=excluded.title, schedule=excluded.schedule,
severity_default=excluded.severity_default, config_json=excluded.config_json, updated_at=excluded.updated_at""",
(scenario["id"], scenario["title"], scenario.get("schedule","*/5 * * * *"),
scenario.get("severity_default","warn"), json.dumps(scenario), _now()))
def list_scenarios(conn):
out = []
for row in conn.execute("SELECT * FROM agent_scenarios WHERE enabled=1 ORDER BY id"):
item = dict(row)
item["config"] = json.loads(item.pop("config_json") or "{}")
last = conn.execute("SELECT status,started_at FROM agent_runs WHERE scenario_id=? ORDER BY id DESC LIMIT 1", (row["id"],)).fetchone()
item["last_run_status"] = last["status"] if last else None
item["last_run_at"] = last["started_at"] if last else None
out.append(item)
return out
def get_scenario(conn, scenario_id):
row = conn.execute("SELECT * FROM agent_scenarios WHERE id=? AND enabled=1", (scenario_id,)).fetchone()
if not row: return None
item = dict(row); item["config"] = json.loads(item.pop("config_json") or "{}"); return item
def create_run(conn, scenario_id, trigger):
return int(conn.execute("INSERT INTO agent_runs (scenario_id,trigger,status,started_at) VALUES (?,?,?,?)",
(scenario_id, trigger, "running", _now())).lastrowid)
def finish_run(conn, run_id, *, status, summary, llm_model=None, metadata=None):
conn.execute("UPDATE agent_runs SET status=?,summary_text=?,llm_model=?,metadata_json=?,finished_at=? WHERE id=?",
(status, summary, llm_model, json.dumps(metadata or {}), _now(), run_id))
def add_finding(conn, run_id, *, severity, category, title, detail_md="", evidence=None, human_action="", kb_refs=None):
return int(conn.execute("""INSERT INTO agent_findings (run_id,severity,category,title,detail_md,evidence_json,
suggested_human_action,kb_refs_json,created_at) VALUES (?,?,?,?,?,?,?,?,?)""",
(run_id, severity, category, title, detail_md, json.dumps(evidence or {}), human_action,
json.dumps(kb_refs or []), _now())).lastrowid)
def list_findings(conn, *, severity=None, limit=50, open_only=True):
q, params = "SELECT * FROM agent_findings WHERE 1=1", []
if severity: q += " AND severity=?"; params.append(severity)
if open_only: q += " AND acknowledged_at IS NULL"
q += " ORDER BY id DESC LIMIT ?"; params.append(limit)
return [dict(r) for r in conn.execute(q, params)]
def list_action_log(conn, limit=100):
return [dict(r) for r in conn.execute("SELECT * FROM agent_action_log ORDER BY id DESC LIMIT ?", (limit,))]
def index_kb_file(conn, source_path, text):
_exec_retry(conn, "DELETE FROM agent_kb_chunks WHERE source_path=?", (source_path,))
now = _now()
for i in range(0, len(text), 1200):
_exec_retry(conn, "INSERT INTO agent_kb_chunks (source_path,chunk_text,indexed_at) VALUES (?,?,?)",
(source_path, text[i:i+1200], now))
conn.commit()
def search_kb(conn, query, limit=8):
terms = [t.strip().lower() for t in query.split() if len(t.strip()) > 2]
if not terms: return []
scored = []
for row in conn.execute("SELECT source_path,chunk_text FROM agent_kb_chunks"):
score = sum(1 for t in terms if t in row["chunk_text"].lower())
if score: scored.append((score, {"source": row["source_path"], "snippet": row["chunk_text"][:400]}))
scored.sort(key=lambda x: -x[0])
return [s[1] for s in scored[:limit]]
SEVERITY_RANK = {"info": 0, "warn": 1, "high": 2, "critical": 3}
def _severity_max(a: str, b: str) -> str:
return a if SEVERITY_RANK.get(a, 0) >= SEVERITY_RANK.get(b, 0) else b
def _incident_row(conn, incident_id: int) -> dict | None:
row = conn.execute("SELECT * FROM agent_incidents WHERE id=?", (incident_id,)).fetchone()
return dict(row) if row else None
def _enrich_incident(conn, row: dict) -> dict:
from app.agents.catalog import AGENT_CATALOG
item = dict(row)
p = AGENT_CATALOG.get(item.get("primary_agent", ""))
item["agent_name"] = p.name if p else item.get("primary_agent", "Vigia")
return item
def upsert_incident(
conn,
*,
scenario_id: str,
finding_id: int,
title: str,
severity: str,
primary_agent: str,
suggested_human_action: str = "",
) -> tuple[dict, bool]:
"""Retorna (incidente, notify_operators). notify=True na 1ª ocorrência ou escalação."""
from app.agents import messages as agent_messages
now = _now()
row = conn.execute("SELECT * FROM agent_incidents WHERE scenario_id=?", (scenario_id,)).fetchone()
if row:
item = dict(row)
prev_sev = item["severity"]
new_sev = _severity_max(prev_sev, severity)
was_closed = item["status"] != "open"
notify = was_closed or SEVERITY_RANK.get(new_sev, 0) > SEVERITY_RANK.get(prev_sev, 0)
if was_closed:
conn.execute(
"""UPDATE agent_incidents SET occurrence_count=occurrence_count+1, last_seen_at=?,
latest_finding_id=?, severity=?, title=?, suggested_human_action=?,
primary_agent=?, status='open', acknowledged_at=NULL, acknowledged_by=NULL WHERE id=?""",
(now, finding_id, new_sev, title, suggested_human_action or item.get("suggested_human_action"), primary_agent, item["id"]),
)
else:
conn.execute(
"""UPDATE agent_incidents SET occurrence_count=occurrence_count+1, last_seen_at=?,
latest_finding_id=?, severity=?, title=?, suggested_human_action=?,
primary_agent=? WHERE id=?""",
(now, finding_id, new_sev, title, suggested_human_action or item.get("suggested_human_action"), primary_agent, item["id"]),
)
out = _incident_row(conn, item["id"])
if out and not out.get("thread_id"):
thread_id = agent_messages.create_thread(
conn, subject=title, primary_agent=primary_agent, severity=new_sev, related_finding_id=finding_id
)
conn.execute("UPDATE agent_incidents SET thread_id=? WHERE id=?", (thread_id, item["id"]))
out = _incident_row(conn, item["id"])
return _enrich_incident(conn, out or {}), notify
thread_id = agent_messages.create_thread(
conn, subject=title, primary_agent=primary_agent, severity=severity, related_finding_id=finding_id
)
iid = int(
conn.execute(
"""INSERT INTO agent_incidents
(scenario_id, primary_agent, severity, status, title, latest_finding_id,
occurrence_count, first_seen_at, last_seen_at, suggested_human_action, thread_id)
VALUES (?,?,?,?,?,?,1,?,?,?,?)""",
(scenario_id, primary_agent, severity, "open", title, finding_id, now, now, suggested_human_action, thread_id),
).lastrowid
)
return _enrich_incident(conn, _incident_row(conn, iid) or {}), True
def list_incidents(
conn,
*,
status: str = "open",
severity: str | None = None,
agent_id: str | None = None,
limit: int = 50,
) -> list[dict]:
q = "SELECT * FROM agent_incidents WHERE 1=1"
params: list[Any] = []
if status != "all":
q += " AND status=?"
params.append(status)
if severity:
q += " AND severity=?"
params.append(severity)
if agent_id:
q += " AND primary_agent=?"
params.append(agent_id)
q += " ORDER BY CASE severity WHEN 'critical' THEN 0 WHEN 'high' THEN 1 WHEN 'warn' THEN 2 ELSE 3 END, last_seen_at DESC LIMIT ?"
params.append(limit)
return [_enrich_incident(conn, dict(r)) for r in conn.execute(q, params)]
def get_incident(conn, incident_id: int) -> dict | None:
row = conn.execute("SELECT * FROM agent_incidents WHERE id=?", (incident_id,)).fetchone()
return _enrich_incident(conn, dict(row)) if row else None
def ack_incident(conn, incident_id: int, username: str) -> dict | None:
inc = get_incident(conn, incident_id)
if not inc:
return None
now = _now()
conn.execute(
"UPDATE agent_incidents SET status='ack', acknowledged_at=?, acknowledged_by=? WHERE id=?",
(now, username, incident_id),
)
if inc.get("latest_finding_id"):
conn.execute(
"UPDATE agent_findings SET acknowledged_at=?, acknowledged_by=? WHERE id=? AND acknowledged_at IS NULL",
(now, username, inc["latest_finding_id"]),
)
if inc.get("thread_id"):
conn.execute(
"""UPDATE agent_messages SET acknowledged_at=?, acknowledged_by=?
WHERE thread_id=? AND requires_human=1 AND acknowledged_at IS NULL""",
(now, username, inc["thread_id"]),
)
return get_incident(conn, incident_id)
def resolve_incidents_for_scenario(conn, scenario_id: str) -> None:
"""Marca incidente resolvido quando cenário volta a OK."""
conn.execute(
"UPDATE agent_incidents SET status='resolved', last_seen_at=? WHERE scenario_id=? AND status='open'",
(_now(), scenario_id),
)
def get_overview(conn) -> dict:
import os
from app.agents import llm_client
last_tick = conn.execute(
"SELECT ts, message, payload_json FROM agent_action_log WHERE event_type='tick.complete' ORDER BY id DESC LIMIT 1"
).fetchone()
open_counts = {r["severity"]: r["c"] for r in conn.execute(
"SELECT severity, COUNT(*) c FROM agent_incidents WHERE status='open' GROUP BY severity"
)}
scenarios = list_scenarios(conn)
ok_count = sum(1 for s in scenarios if s.get("last_run_status") == "ok")
payload = {}
if last_tick and last_tick["payload_json"]:
try:
payload = json.loads(last_tick["payload_json"])
except json.JSONDecodeError:
pass
return {
"tier": "t1" if llm_client.AGENTIC_LLM_ENABLED else "t0",
"ollama": llm_client.ollama_available(),
"model": llm_client.AGENTIC_LLM_MODEL,
"last_tick_at": last_tick["ts"] if last_tick else None,
"last_tick_status": "degraded" if payload.get("runs") and any(
r.get("findings_count", 0) > 0 for r in payload.get("runs", []) if isinstance(r, dict)
) else "ok",
"scenarios_total": len(scenarios),
"scenarios_ok": ok_count,
"incidents_open": {
"critical": open_counts.get("critical", 0),
"high": open_counts.get("high", 0),
"warn": open_counts.get("warn", 0),
"info": open_counts.get("info", 0),
},
"worker_interval_sec": int(os.getenv("AGENTIC_INTERVAL_SEC", "600")),
}
def recent_runs_for_scenario(conn, scenario_id: str, limit: int = 12) -> list[dict]:
return [
dict(r)
for r in conn.execute(
"""SELECT id AS run_id, status, started_at, summary_text,
(SELECT COUNT(*) FROM agent_findings f WHERE f.run_id=agent_runs.id) AS findings_count
FROM agent_runs WHERE scenario_id=? ORDER BY id DESC LIMIT ?""",
(scenario_id, limit),
)
]
def backfill_incidents(conn) -> int:
"""Consolida findings abertos legacy em agent_incidents (one-time safe)."""
rows = conn.execute(
"""SELECT f.id AS finding_id, f.severity, f.title, f.suggested_human_action, f.created_at,
r.scenario_id
FROM agent_findings f
JOIN agent_runs r ON r.id = f.run_id
WHERE f.acknowledged_at IS NULL
ORDER BY f.id ASC"""
).fetchall()
seen: set[str] = set()
n = 0
for row in rows:
sid = row["scenario_id"]
if sid in seen:
continue
seen.add(sid)
existing = conn.execute("SELECT id FROM agent_incidents WHERE scenario_id=?", (sid,)).fetchone()
if existing:
continue
from app.agents.catalog import SCENARIO_AGENT_MAP
agent_id = SCENARIO_AGENT_MAP.get(sid, "sentinel")
upsert_incident(
conn,
scenario_id=sid,
finding_id=row["finding_id"],
title=row["title"],
severity=row["severity"],
primary_agent=agent_id,
suggested_human_action=row["suggested_human_action"] or "",
)
n += 1
conn.commit()
return n