ligbox-ops-platform/export-chat-bruto-standalone.py
Ligbox Spec Hub 3a2c64834b Initial import: ligbox-ops-platform + specs + LAPTOP + obsidian merge (CT130)
Source: VM122 /opt + obsidian-infra + LAPTOP
Hub: CT130 spec-hub 10.10.10.130
2026-06-19 17:26:41 +00:00

126 lines
4.4 KiB
Python
Executable file

#!/usr/bin/env python3
"""Exporta transcript Cursor (.jsonl) → CHAT_BRUTO (.txt) + cópia multi-canal."""
from __future__ import annotations
import json
import re
import shutil
import sys
from datetime import datetime
from pathlib import Path
def _extract_user_text(text: str) -> str:
text = re.sub(r"<timestamp>.*?</timestamp>\s*", "", text, flags=re.DOTALL)
m = re.search(r"<user_query>\s*(.*?)\s*</user_query>", text, flags=re.DOTALL)
if m:
return m.group(1).strip()
return text.strip()
def convert(jsonl_src: Path, txt_dst: Path, meta: dict) -> int:
lines = jsonl_src.read_text(encoding="utf-8", errors="replace").splitlines()
out: list[str] = []
out.append("=" * 80)
out.append(f"CHAT BRUTO — {meta.get('title', txt_dst.stem)}")
out.append(f"Transcript Cursor: {meta.get('transcript_id', '')}")
out.append(f"Projeto: {meta.get('project', 'ligbox-ops-platform')}")
out.append(f"Gerado em: {meta.get('date', datetime.now().strftime('%Y-%m-%d'))}")
out.append(meta.get("description", "Texto integral (user + assistant + ferramentas)."))
out.append("=" * 80)
out.append("")
msg_no = 0
for raw in lines:
if not raw.strip():
continue
try:
row = json.loads(raw)
except json.JSONDecodeError:
continue
role = (row.get("role") or "").upper()
if role not in ("USER", "ASSISTANT"):
continue
msg_no += 1
out.append("" * 80)
out.append(f"[{msg_no}] {role}")
out.append("" * 80)
content = row.get("message", {}).get("content", [])
for part in content:
ptype = part.get("type")
if ptype == "text":
text = part.get("text", "")
if role == "USER":
text = _extract_user_text(text)
if "[REDACTED]" in text:
text = text.split("[REDACTED]")[0].rstrip()
if text.strip():
out.append(text.strip())
out.append("")
elif ptype == "tool_use":
out.append("[TOOL: " + str(part.get("name", "unknown")) + "]")
inp = part.get("input")
if inp is not None:
out.append(json.dumps(inp, ensure_ascii=False, indent=2))
out.append("")
out.append("")
txt_dst.parent.mkdir(parents=True, exist_ok=True)
txt_dst.write_text("\n".join(out).rstrip() + "\n", encoding="utf-8")
return msg_no
def main() -> int:
if len(sys.argv) < 4:
print(
"Uso: export-chat-bruto-standalone.py <jsonl_src> <base_name> <transcript_id>",
file=sys.stderr,
)
return 1
jsonl_src = Path(sys.argv[1]).resolve()
base = sys.argv[2]
transcript_id = sys.argv[3]
project_root = Path(__file__).resolve().parents[1]
channels = [
project_root / "chat-bruto",
project_root / "docs" / "anais-referencia" / "chat-bruto",
Path("/root/ligbox-ops-platform-chat-bruto"),
Path("/root/ligbox-ops-platform-chat-bruto/anais-referencia/chat-bruto"),
project_root / "LAPTOP",
Path("/root/obsidian-infra/ligbox-ops-platform/chat-bruto"),
Path("/root/obsidian-infra/ligbox-ops-platform/LAPTOP"),
]
meta = {
"title": base,
"transcript_id": transcript_id,
"project": "ligbox-ops-platform / VM122 / Overview + DNS Cloudflare + UI",
"date": datetime.now().strftime("%Y-%m-%d"),
"description": (
"Sessão Roger: menu lateral SVG, Overview clássico (modal tenant/domínio), "
"Overview Home estilo Cloudflare, card DNS Cloudflare na linha de métricas. "
"Texto integral (user + assistant + ferramentas). Sem resumos de síntese."
),
}
tmp_txt = project_root / "chat-bruto" / f"{base}.txt"
count = convert(jsonl_src, tmp_txt, meta)
for ch in channels:
ch.mkdir(parents=True, exist_ok=True)
dst_txt = ch / f"{base}.txt"
if dst_txt.resolve() != tmp_txt.resolve():
shutil.copy2(tmp_txt, dst_txt)
dst_jsonl = ch / f"{base}.jsonl"
if dst_jsonl.resolve() != jsonl_src.resolve():
shutil.copy2(jsonl_src, dst_jsonl)
print(f"OK: {count} mensagens → {base}.txt ({len(channels)} canais)")
return 0
if __name__ == "__main__":
raise SystemExit(main())