126 lines
4.4 KiB
Python
Executable file
126 lines
4.4 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
"""Exporta transcript Cursor (.jsonl) → CHAT_BRUTO (.txt) + cópia multi-canal."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import re
|
|
import shutil
|
|
import sys
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
|
|
def _extract_user_text(text: str) -> str:
|
|
text = re.sub(r"<timestamp>.*?</timestamp>\s*", "", text, flags=re.DOTALL)
|
|
m = re.search(r"<user_query>\s*(.*?)\s*</user_query>", text, flags=re.DOTALL)
|
|
if m:
|
|
return m.group(1).strip()
|
|
return text.strip()
|
|
|
|
|
|
def convert(jsonl_src: Path, txt_dst: Path, meta: dict) -> int:
|
|
lines = jsonl_src.read_text(encoding="utf-8", errors="replace").splitlines()
|
|
out: list[str] = []
|
|
out.append("=" * 80)
|
|
out.append(f"CHAT BRUTO — {meta.get('title', txt_dst.stem)}")
|
|
out.append(f"Transcript Cursor: {meta.get('transcript_id', '—')}")
|
|
out.append(f"Projeto: {meta.get('project', 'ligbox-ops-platform')}")
|
|
out.append(f"Gerado em: {meta.get('date', datetime.now().strftime('%Y-%m-%d'))}")
|
|
out.append(meta.get("description", "Texto integral (user + assistant + ferramentas)."))
|
|
out.append("=" * 80)
|
|
out.append("")
|
|
|
|
msg_no = 0
|
|
for raw in lines:
|
|
if not raw.strip():
|
|
continue
|
|
try:
|
|
row = json.loads(raw)
|
|
except json.JSONDecodeError:
|
|
continue
|
|
role = (row.get("role") or "").upper()
|
|
if role not in ("USER", "ASSISTANT"):
|
|
continue
|
|
msg_no += 1
|
|
out.append("─" * 80)
|
|
out.append(f"[{msg_no}] {role}")
|
|
out.append("─" * 80)
|
|
content = row.get("message", {}).get("content", [])
|
|
for part in content:
|
|
ptype = part.get("type")
|
|
if ptype == "text":
|
|
text = part.get("text", "")
|
|
if role == "USER":
|
|
text = _extract_user_text(text)
|
|
if "[REDACTED]" in text:
|
|
text = text.split("[REDACTED]")[0].rstrip()
|
|
if text.strip():
|
|
out.append(text.strip())
|
|
out.append("")
|
|
elif ptype == "tool_use":
|
|
out.append("[TOOL: " + str(part.get("name", "unknown")) + "]")
|
|
inp = part.get("input")
|
|
if inp is not None:
|
|
out.append(json.dumps(inp, ensure_ascii=False, indent=2))
|
|
out.append("")
|
|
out.append("")
|
|
|
|
txt_dst.parent.mkdir(parents=True, exist_ok=True)
|
|
txt_dst.write_text("\n".join(out).rstrip() + "\n", encoding="utf-8")
|
|
return msg_no
|
|
|
|
|
|
def main() -> int:
|
|
if len(sys.argv) < 4:
|
|
print(
|
|
"Uso: export-chat-bruto-standalone.py <jsonl_src> <base_name> <transcript_id>",
|
|
file=sys.stderr,
|
|
)
|
|
return 1
|
|
|
|
jsonl_src = Path(sys.argv[1]).resolve()
|
|
base = sys.argv[2]
|
|
transcript_id = sys.argv[3]
|
|
project_root = Path(__file__).resolve().parents[1]
|
|
|
|
channels = [
|
|
project_root / "chat-bruto",
|
|
project_root / "docs" / "anais-referencia" / "chat-bruto",
|
|
Path("/root/ligbox-ops-platform-chat-bruto"),
|
|
Path("/root/ligbox-ops-platform-chat-bruto/anais-referencia/chat-bruto"),
|
|
project_root / "LAPTOP",
|
|
Path("/root/obsidian-infra/ligbox-ops-platform/chat-bruto"),
|
|
Path("/root/obsidian-infra/ligbox-ops-platform/LAPTOP"),
|
|
]
|
|
|
|
meta = {
|
|
"title": base,
|
|
"transcript_id": transcript_id,
|
|
"project": "ligbox-ops-platform / VM122 / Overview + DNS Cloudflare + UI",
|
|
"date": datetime.now().strftime("%Y-%m-%d"),
|
|
"description": (
|
|
"Sessão Roger: menu lateral SVG, Overview clássico (modal tenant/domínio), "
|
|
"Overview Home estilo Cloudflare, card DNS Cloudflare na linha de métricas. "
|
|
"Texto integral (user + assistant + ferramentas). Sem resumos de síntese."
|
|
),
|
|
}
|
|
|
|
tmp_txt = project_root / "chat-bruto" / f"{base}.txt"
|
|
count = convert(jsonl_src, tmp_txt, meta)
|
|
|
|
for ch in channels:
|
|
ch.mkdir(parents=True, exist_ok=True)
|
|
dst_txt = ch / f"{base}.txt"
|
|
if dst_txt.resolve() != tmp_txt.resolve():
|
|
shutil.copy2(tmp_txt, dst_txt)
|
|
dst_jsonl = ch / f"{base}.jsonl"
|
|
if dst_jsonl.resolve() != jsonl_src.resolve():
|
|
shutil.copy2(jsonl_src, dst_jsonl)
|
|
|
|
print(f"OK: {count} mensagens → {base}.txt ({len(channels)} canais)")
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|