Add agent_incidents dedup, overview/incidents/timeline API, mission board UI with fleet rail, kanban, context panel, mobile tabs, poll and keyboard shortcuts. Co-authored-by: Cursor <cursoragent@cursor.com>
366 lines
16 KiB
Python
366 lines
16 KiB
Python
"""Persistence Agentic Ops."""
|
|
from __future__ import annotations
|
|
import json, sqlite3, time
|
|
from datetime import datetime, timezone
|
|
from typing import Any
|
|
|
|
from app.agents.messages import init_messages_schema
|
|
|
|
def _now():
|
|
return datetime.now(timezone.utc).isoformat()
|
|
|
|
def _exec_retry(conn, sql, params=(), *, attempts=8):
|
|
for attempt in range(attempts):
|
|
try:
|
|
return conn.execute(sql, params)
|
|
except sqlite3.OperationalError as exc:
|
|
if "locked" not in str(exc).lower() or attempt >= attempts - 1:
|
|
raise
|
|
time.sleep(0.25 * (attempt + 1))
|
|
|
|
def init_agent_schema(conn):
|
|
init_messages_schema(conn)
|
|
conn.executescript("""
|
|
CREATE TABLE IF NOT EXISTS agent_scenarios (
|
|
id TEXT PRIMARY KEY, title TEXT NOT NULL, schedule TEXT,
|
|
severity_default TEXT NOT NULL DEFAULT 'warn', config_json TEXT NOT NULL,
|
|
enabled INTEGER NOT NULL DEFAULT 1, updated_at TEXT NOT NULL);
|
|
CREATE TABLE IF NOT EXISTS agent_runs (
|
|
id INTEGER PRIMARY KEY, scenario_id TEXT NOT NULL, trigger TEXT NOT NULL DEFAULT 'cron',
|
|
status TEXT NOT NULL, summary_text TEXT, llm_model TEXT, metadata_json TEXT,
|
|
started_at TEXT NOT NULL, finished_at TEXT);
|
|
CREATE TABLE IF NOT EXISTS agent_findings (
|
|
id INTEGER PRIMARY KEY, run_id INTEGER NOT NULL, severity TEXT NOT NULL,
|
|
category TEXT NOT NULL DEFAULT 'api', title TEXT NOT NULL, detail_md TEXT,
|
|
evidence_json TEXT, suggested_human_action TEXT, kb_refs_json TEXT,
|
|
acknowledged_at TEXT, acknowledged_by TEXT, created_at TEXT NOT NULL);
|
|
CREATE TABLE IF NOT EXISTS agent_action_log (
|
|
id INTEGER PRIMARY KEY, ts TEXT NOT NULL, agent_id TEXT NOT NULL DEFAULT 'sentinel',
|
|
run_id INTEGER, event_type TEXT NOT NULL, message TEXT, payload_json TEXT);
|
|
CREATE TABLE IF NOT EXISTS agent_kb_chunks (
|
|
id INTEGER PRIMARY KEY, source_path TEXT NOT NULL, chunk_text TEXT NOT NULL, indexed_at TEXT NOT NULL);
|
|
CREATE INDEX IF NOT EXISTS idx_agent_runs_scenario ON agent_runs(scenario_id);
|
|
CREATE TABLE IF NOT EXISTS agent_incidents (
|
|
id INTEGER PRIMARY KEY,
|
|
scenario_id TEXT NOT NULL UNIQUE,
|
|
primary_agent TEXT NOT NULL,
|
|
severity TEXT NOT NULL,
|
|
status TEXT NOT NULL DEFAULT 'open',
|
|
title TEXT NOT NULL,
|
|
latest_finding_id INTEGER,
|
|
occurrence_count INTEGER NOT NULL DEFAULT 1,
|
|
first_seen_at TEXT NOT NULL,
|
|
last_seen_at TEXT NOT NULL,
|
|
suggested_human_action TEXT,
|
|
thread_id INTEGER,
|
|
acknowledged_at TEXT,
|
|
acknowledged_by TEXT);
|
|
CREATE INDEX IF NOT EXISTS idx_agent_incidents_status ON agent_incidents(status);
|
|
""")
|
|
try:
|
|
if conn.execute("SELECT COUNT(*) FROM agent_incidents").fetchone()[0] == 0:
|
|
backfill_incidents(conn)
|
|
except sqlite3.OperationalError:
|
|
pass
|
|
|
|
def log_event(conn, *, event_type, message, agent_id="sentinel", run_id=None, payload=None):
|
|
conn.execute("INSERT INTO agent_action_log (ts,agent_id,run_id,event_type,message,payload_json) VALUES (?,?,?,?,?,?)",
|
|
(_now(), agent_id, run_id, event_type, message, json.dumps(payload or {})))
|
|
try:
|
|
conn.execute("INSERT INTO desk_security_audit (username,event_type,client_ip,payload,created_at) VALUES (?,?,?,?,?)",
|
|
("agentic", f"agent.{event_type}", "vm122", json.dumps({"message": message, **(payload or {})}), _now()))
|
|
except sqlite3.OperationalError:
|
|
pass
|
|
|
|
def upsert_scenario(conn, scenario):
|
|
conn.execute("""INSERT INTO agent_scenarios (id,title,schedule,severity_default,config_json,enabled,updated_at)
|
|
VALUES (?,?,?,?,?,1,?) ON CONFLICT(id) DO UPDATE SET title=excluded.title, schedule=excluded.schedule,
|
|
severity_default=excluded.severity_default, config_json=excluded.config_json, updated_at=excluded.updated_at""",
|
|
(scenario["id"], scenario["title"], scenario.get("schedule","*/5 * * * *"),
|
|
scenario.get("severity_default","warn"), json.dumps(scenario), _now()))
|
|
|
|
def list_scenarios(conn):
|
|
out = []
|
|
for row in conn.execute("SELECT * FROM agent_scenarios WHERE enabled=1 ORDER BY id"):
|
|
item = dict(row)
|
|
item["config"] = json.loads(item.pop("config_json") or "{}")
|
|
last = conn.execute("SELECT status,started_at FROM agent_runs WHERE scenario_id=? ORDER BY id DESC LIMIT 1", (row["id"],)).fetchone()
|
|
item["last_run_status"] = last["status"] if last else None
|
|
item["last_run_at"] = last["started_at"] if last else None
|
|
out.append(item)
|
|
return out
|
|
|
|
def get_scenario(conn, scenario_id):
|
|
row = conn.execute("SELECT * FROM agent_scenarios WHERE id=? AND enabled=1", (scenario_id,)).fetchone()
|
|
if not row: return None
|
|
item = dict(row); item["config"] = json.loads(item.pop("config_json") or "{}"); return item
|
|
|
|
def create_run(conn, scenario_id, trigger):
|
|
return int(conn.execute("INSERT INTO agent_runs (scenario_id,trigger,status,started_at) VALUES (?,?,?,?)",
|
|
(scenario_id, trigger, "running", _now())).lastrowid)
|
|
|
|
def finish_run(conn, run_id, *, status, summary, llm_model=None, metadata=None):
|
|
conn.execute("UPDATE agent_runs SET status=?,summary_text=?,llm_model=?,metadata_json=?,finished_at=? WHERE id=?",
|
|
(status, summary, llm_model, json.dumps(metadata or {}), _now(), run_id))
|
|
|
|
def add_finding(conn, run_id, *, severity, category, title, detail_md="", evidence=None, human_action="", kb_refs=None):
|
|
return int(conn.execute("""INSERT INTO agent_findings (run_id,severity,category,title,detail_md,evidence_json,
|
|
suggested_human_action,kb_refs_json,created_at) VALUES (?,?,?,?,?,?,?,?,?)""",
|
|
(run_id, severity, category, title, detail_md, json.dumps(evidence or {}), human_action,
|
|
json.dumps(kb_refs or []), _now())).lastrowid)
|
|
|
|
def list_findings(conn, *, severity=None, limit=50, open_only=True):
|
|
q, params = "SELECT * FROM agent_findings WHERE 1=1", []
|
|
if severity: q += " AND severity=?"; params.append(severity)
|
|
if open_only: q += " AND acknowledged_at IS NULL"
|
|
q += " ORDER BY id DESC LIMIT ?"; params.append(limit)
|
|
return [dict(r) for r in conn.execute(q, params)]
|
|
|
|
def list_action_log(conn, limit=100):
|
|
return [dict(r) for r in conn.execute("SELECT * FROM agent_action_log ORDER BY id DESC LIMIT ?", (limit,))]
|
|
|
|
def index_kb_file(conn, source_path, text):
|
|
_exec_retry(conn, "DELETE FROM agent_kb_chunks WHERE source_path=?", (source_path,))
|
|
now = _now()
|
|
for i in range(0, len(text), 1200):
|
|
_exec_retry(conn, "INSERT INTO agent_kb_chunks (source_path,chunk_text,indexed_at) VALUES (?,?,?)",
|
|
(source_path, text[i:i+1200], now))
|
|
conn.commit()
|
|
|
|
def search_kb(conn, query, limit=8):
|
|
terms = [t.strip().lower() for t in query.split() if len(t.strip()) > 2]
|
|
if not terms: return []
|
|
scored = []
|
|
for row in conn.execute("SELECT source_path,chunk_text FROM agent_kb_chunks"):
|
|
score = sum(1 for t in terms if t in row["chunk_text"].lower())
|
|
if score: scored.append((score, {"source": row["source_path"], "snippet": row["chunk_text"][:400]}))
|
|
scored.sort(key=lambda x: -x[0])
|
|
return [s[1] for s in scored[:limit]]
|
|
|
|
|
|
SEVERITY_RANK = {"info": 0, "warn": 1, "high": 2, "critical": 3}
|
|
|
|
|
|
def _severity_max(a: str, b: str) -> str:
|
|
return a if SEVERITY_RANK.get(a, 0) >= SEVERITY_RANK.get(b, 0) else b
|
|
|
|
|
|
def _incident_row(conn, incident_id: int) -> dict | None:
|
|
row = conn.execute("SELECT * FROM agent_incidents WHERE id=?", (incident_id,)).fetchone()
|
|
return dict(row) if row else None
|
|
|
|
|
|
def _enrich_incident(conn, row: dict) -> dict:
|
|
from app.agents.catalog import AGENT_CATALOG
|
|
|
|
item = dict(row)
|
|
p = AGENT_CATALOG.get(item.get("primary_agent", ""))
|
|
item["agent_name"] = p.name if p else item.get("primary_agent", "Vigia")
|
|
return item
|
|
|
|
|
|
def upsert_incident(
|
|
conn,
|
|
*,
|
|
scenario_id: str,
|
|
finding_id: int,
|
|
title: str,
|
|
severity: str,
|
|
primary_agent: str,
|
|
suggested_human_action: str = "",
|
|
) -> tuple[dict, bool]:
|
|
"""Retorna (incidente, notify_operators). notify=True na 1ª ocorrência ou escalação."""
|
|
from app.agents import messages as agent_messages
|
|
|
|
now = _now()
|
|
row = conn.execute("SELECT * FROM agent_incidents WHERE scenario_id=?", (scenario_id,)).fetchone()
|
|
if row:
|
|
item = dict(row)
|
|
prev_sev = item["severity"]
|
|
new_sev = _severity_max(prev_sev, severity)
|
|
was_closed = item["status"] != "open"
|
|
notify = was_closed or SEVERITY_RANK.get(new_sev, 0) > SEVERITY_RANK.get(prev_sev, 0)
|
|
if was_closed:
|
|
conn.execute(
|
|
"""UPDATE agent_incidents SET occurrence_count=occurrence_count+1, last_seen_at=?,
|
|
latest_finding_id=?, severity=?, title=?, suggested_human_action=?,
|
|
primary_agent=?, status='open', acknowledged_at=NULL, acknowledged_by=NULL WHERE id=?""",
|
|
(now, finding_id, new_sev, title, suggested_human_action or item.get("suggested_human_action"), primary_agent, item["id"]),
|
|
)
|
|
else:
|
|
conn.execute(
|
|
"""UPDATE agent_incidents SET occurrence_count=occurrence_count+1, last_seen_at=?,
|
|
latest_finding_id=?, severity=?, title=?, suggested_human_action=?,
|
|
primary_agent=? WHERE id=?""",
|
|
(now, finding_id, new_sev, title, suggested_human_action or item.get("suggested_human_action"), primary_agent, item["id"]),
|
|
)
|
|
out = _incident_row(conn, item["id"])
|
|
if out and not out.get("thread_id"):
|
|
thread_id = agent_messages.create_thread(
|
|
conn, subject=title, primary_agent=primary_agent, severity=new_sev, related_finding_id=finding_id
|
|
)
|
|
conn.execute("UPDATE agent_incidents SET thread_id=? WHERE id=?", (thread_id, item["id"]))
|
|
out = _incident_row(conn, item["id"])
|
|
return _enrich_incident(conn, out or {}), notify
|
|
|
|
thread_id = agent_messages.create_thread(
|
|
conn, subject=title, primary_agent=primary_agent, severity=severity, related_finding_id=finding_id
|
|
)
|
|
iid = int(
|
|
conn.execute(
|
|
"""INSERT INTO agent_incidents
|
|
(scenario_id, primary_agent, severity, status, title, latest_finding_id,
|
|
occurrence_count, first_seen_at, last_seen_at, suggested_human_action, thread_id)
|
|
VALUES (?,?,?,?,?,?,1,?,?,?,?)""",
|
|
(scenario_id, primary_agent, severity, "open", title, finding_id, now, now, suggested_human_action, thread_id),
|
|
).lastrowid
|
|
)
|
|
return _enrich_incident(conn, _incident_row(conn, iid) or {}), True
|
|
|
|
|
|
def list_incidents(
|
|
conn,
|
|
*,
|
|
status: str = "open",
|
|
severity: str | None = None,
|
|
agent_id: str | None = None,
|
|
limit: int = 50,
|
|
) -> list[dict]:
|
|
q = "SELECT * FROM agent_incidents WHERE 1=1"
|
|
params: list[Any] = []
|
|
if status != "all":
|
|
q += " AND status=?"
|
|
params.append(status)
|
|
if severity:
|
|
q += " AND severity=?"
|
|
params.append(severity)
|
|
if agent_id:
|
|
q += " AND primary_agent=?"
|
|
params.append(agent_id)
|
|
q += " ORDER BY CASE severity WHEN 'critical' THEN 0 WHEN 'high' THEN 1 WHEN 'warn' THEN 2 ELSE 3 END, last_seen_at DESC LIMIT ?"
|
|
params.append(limit)
|
|
return [_enrich_incident(conn, dict(r)) for r in conn.execute(q, params)]
|
|
|
|
|
|
def get_incident(conn, incident_id: int) -> dict | None:
|
|
row = conn.execute("SELECT * FROM agent_incidents WHERE id=?", (incident_id,)).fetchone()
|
|
return _enrich_incident(conn, dict(row)) if row else None
|
|
|
|
|
|
def ack_incident(conn, incident_id: int, username: str) -> dict | None:
|
|
inc = get_incident(conn, incident_id)
|
|
if not inc:
|
|
return None
|
|
now = _now()
|
|
conn.execute(
|
|
"UPDATE agent_incidents SET status='ack', acknowledged_at=?, acknowledged_by=? WHERE id=?",
|
|
(now, username, incident_id),
|
|
)
|
|
if inc.get("latest_finding_id"):
|
|
conn.execute(
|
|
"UPDATE agent_findings SET acknowledged_at=?, acknowledged_by=? WHERE id=? AND acknowledged_at IS NULL",
|
|
(now, username, inc["latest_finding_id"]),
|
|
)
|
|
if inc.get("thread_id"):
|
|
conn.execute(
|
|
"""UPDATE agent_messages SET acknowledged_at=?, acknowledged_by=?
|
|
WHERE thread_id=? AND requires_human=1 AND acknowledged_at IS NULL""",
|
|
(now, username, inc["thread_id"]),
|
|
)
|
|
return get_incident(conn, incident_id)
|
|
|
|
|
|
def resolve_incidents_for_scenario(conn, scenario_id: str) -> None:
|
|
"""Marca incidente resolvido quando cenário volta a OK."""
|
|
conn.execute(
|
|
"UPDATE agent_incidents SET status='resolved', last_seen_at=? WHERE scenario_id=? AND status='open'",
|
|
(_now(), scenario_id),
|
|
)
|
|
|
|
|
|
def get_overview(conn) -> dict:
|
|
import os
|
|
|
|
from app.agents import llm_client
|
|
|
|
last_tick = conn.execute(
|
|
"SELECT ts, message, payload_json FROM agent_action_log WHERE event_type='tick.complete' ORDER BY id DESC LIMIT 1"
|
|
).fetchone()
|
|
open_counts = {r["severity"]: r["c"] for r in conn.execute(
|
|
"SELECT severity, COUNT(*) c FROM agent_incidents WHERE status='open' GROUP BY severity"
|
|
)}
|
|
scenarios = list_scenarios(conn)
|
|
ok_count = sum(1 for s in scenarios if s.get("last_run_status") == "ok")
|
|
payload = {}
|
|
if last_tick and last_tick["payload_json"]:
|
|
try:
|
|
payload = json.loads(last_tick["payload_json"])
|
|
except json.JSONDecodeError:
|
|
pass
|
|
return {
|
|
"tier": "t1" if llm_client.AGENTIC_LLM_ENABLED else "t0",
|
|
"ollama": llm_client.ollama_available(),
|
|
"model": llm_client.AGENTIC_LLM_MODEL,
|
|
"last_tick_at": last_tick["ts"] if last_tick else None,
|
|
"last_tick_status": "degraded" if payload.get("runs") and any(
|
|
r.get("findings_count", 0) > 0 for r in payload.get("runs", []) if isinstance(r, dict)
|
|
) else "ok",
|
|
"scenarios_total": len(scenarios),
|
|
"scenarios_ok": ok_count,
|
|
"incidents_open": {
|
|
"critical": open_counts.get("critical", 0),
|
|
"high": open_counts.get("high", 0),
|
|
"warn": open_counts.get("warn", 0),
|
|
"info": open_counts.get("info", 0),
|
|
},
|
|
"worker_interval_sec": int(os.getenv("AGENTIC_INTERVAL_SEC", "600")),
|
|
}
|
|
|
|
|
|
def recent_runs_for_scenario(conn, scenario_id: str, limit: int = 12) -> list[dict]:
|
|
return [
|
|
dict(r)
|
|
for r in conn.execute(
|
|
"""SELECT id AS run_id, status, started_at, summary_text,
|
|
(SELECT COUNT(*) FROM agent_findings f WHERE f.run_id=agent_runs.id) AS findings_count
|
|
FROM agent_runs WHERE scenario_id=? ORDER BY id DESC LIMIT ?""",
|
|
(scenario_id, limit),
|
|
)
|
|
]
|
|
|
|
|
|
def backfill_incidents(conn) -> int:
|
|
"""Consolida findings abertos legacy em agent_incidents (one-time safe)."""
|
|
rows = conn.execute(
|
|
"""SELECT f.id AS finding_id, f.severity, f.title, f.suggested_human_action, f.created_at,
|
|
r.scenario_id
|
|
FROM agent_findings f
|
|
JOIN agent_runs r ON r.id = f.run_id
|
|
WHERE f.acknowledged_at IS NULL
|
|
ORDER BY f.id ASC"""
|
|
).fetchall()
|
|
seen: set[str] = set()
|
|
n = 0
|
|
for row in rows:
|
|
sid = row["scenario_id"]
|
|
if sid in seen:
|
|
continue
|
|
seen.add(sid)
|
|
existing = conn.execute("SELECT id FROM agent_incidents WHERE scenario_id=?", (sid,)).fetchone()
|
|
if existing:
|
|
continue
|
|
from app.agents.catalog import SCENARIO_AGENT_MAP
|
|
|
|
agent_id = SCENARIO_AGENT_MAP.get(sid, "sentinel")
|
|
upsert_incident(
|
|
conn,
|
|
scenario_id=sid,
|
|
finding_id=row["finding_id"],
|
|
title=row["title"],
|
|
severity=row["severity"],
|
|
primary_agent=agent_id,
|
|
suggested_human_action=row["suggested_human_action"] or "",
|
|
)
|
|
n += 1
|
|
conn.commit()
|
|
return n
|