#!/usr/bin/env python3
from __future__ import annotations

import argparse
import json
import os
import re
import shutil
import subprocess
import sys
from pathlib import Path
from typing import Any
from urllib import request

FALLBACK_TITLE_MAX = 72

AGENTS_DB_API = os.environ.get("AGENTS_DB_API", "http://100.116.176.16:8091")
PI_MODEL = os.environ.get("PI_SESSION_MEMORY_MODEL", "gpt-5.4-mini")
PI_THINKING = os.environ.get("PI_SESSION_MEMORY_THINKING", "off")
PI_BIN = os.environ.get("PI_SESSION_MEMORY_PI_BIN")
MAX_CANDIDATES = int(os.environ.get("PI_SESSION_MEMORY_MAX_CANDIDATES", "6"))
ACTIVE_IMPORTANCE_THRESHOLD = float(os.environ.get("PI_SESSION_MEMORY_ACTIVE_IMPORTANCE_THRESHOLD", "0.92"))
ACTIVE_CONFIDENCE_THRESHOLD = float(os.environ.get("PI_SESSION_MEMORY_ACTIVE_CONFIDENCE_THRESHOLD", "0.9"))
DURABLE_TYPES = {"profile", "decision", "project", "artifact", "procedural", "semantic"}
NOISE_PATTERNS = [
    re.compile(pattern, re.IGNORECASE)
    for pattern in (
        r"you are running one deterministic",
        r"phase-machine pass",
        r"subagent intake task_",
        r"^task id:\s*task_",
        r"^payload:\s*\{",
        r'"route"\s*:\s*\{',
        r"you are iteration ",
        r"continue the bounded ralph-loop",
    )
]


def resolve_pi_bin() -> str:
    candidates = [
        PI_BIN,
        shutil.which("pi"),
        str(Path.home() / ".npm-global" / "bin" / "pi"),
        "/usr/local/bin/pi",
        "/usr/bin/pi",
    ]
    for candidate in candidates:
        if not candidate:
            continue
        if Path(candidate).exists():
            return candidate
    raise FileNotFoundError("pi")


def load_job(job_file: Path) -> dict[str, Any]:
    return json.loads(job_file.read_text(encoding="utf-8"))


def save_job(job_file: Path, payload: dict[str, Any]) -> None:
    job_file.write_text(json.dumps(payload, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")


def call_pi(prompt: str) -> str:
    cmd = [
        resolve_pi_bin(),
        "-p",
        "--provider",
        "openai-codex",
        "--model",
        PI_MODEL,
        "--thinking",
        PI_THINKING,
        "--no-tools",
        "--no-session",
        prompt,
    ]
    proc = subprocess.run(cmd, capture_output=True, text=True)
    if proc.returncode != 0:
        raise RuntimeError(proc.stderr.strip() or proc.stdout.strip() or f"pi failed ({proc.returncode})")
    return proc.stdout.strip()


def fallback_analysis(job: dict[str, Any], error: Exception) -> dict[str, Any]:
    meta = session_meta(job)
    last_user = (meta.get("last_user_text") or "").strip()
    last_assistant = (meta.get("last_assistant_text") or "").strip()
    first_user = (meta.get("first_user_text") or last_user or "").strip()
    if not first_user or not last_assistant:
        return {"memories": [], "reason": f"fallback-no-summary:{error}"}
    title_base = first_user.splitlines()[0].strip()
    if len(title_base) > FALLBACK_TITLE_MAX:
        title_base = title_base[: FALLBACK_TITLE_MAX - 1].rstrip() + "…"
    cwd = meta.get("cwd") or ""
    cwd_name = Path(cwd).name or "session"
    return {
        "memories": [
            {
                "type": "episode",
                "subtype": "session_summary",
                "scope": meta.get("scope") or "project",
                "title": f"{cwd_name}: {title_base}",
                "content": f"User asked: {first_user}\nResult: {last_assistant}",
                "source_ref": f"session:{meta.get('session_id') or Path(str(meta.get('session_file') or '')).stem or 'unknown'}",
                "evidence_ref": meta.get("session_file"),
                "confidence": 0.72,
                "freshness": 0.95,
                "importance": 0.56,
                "reason": f"Deterministic fallback because model analysis was unavailable: {error}",
            }
        ],
        "reason": f"fallback-analysis:{error}",
    }


def ingest(memory: dict[str, Any]) -> dict[str, Any]:
    req = request.Request(
        f"{AGENTS_DB_API}/api/ingest",
        method="POST",
        data=json.dumps(memory).encode(),
        headers={"Content-Type": "application/json"},
    )
    with request.urlopen(req, timeout=30) as resp:
        return json.loads(resp.read().decode("utf-8"))


def extract_json(text: str) -> dict[str, Any]:
    raw = text.strip()
    if raw.startswith("```"):
        raw = raw.strip("`")
        raw = raw.split("\n", 1)[-1] if "\n" in raw else raw
    start = raw.find("{")
    end = raw.rfind("}")
    if start >= 0 and end > start:
        raw = raw[start : end + 1]
    return json.loads(raw)


def session_meta(job: dict[str, Any]) -> dict[str, Any]:
    payload = job.get("payload")
    if isinstance(payload, dict):
        return payload
    return job


def build_prompt(job: dict[str, Any]) -> str:
    meta = session_meta(job)
    return f"""You are extracting reusable memory candidates from a Pi session.

Goal:
- Prefer broad candidate capture over premature rejection.
- Return 0 to {MAX_CANDIDATES} memories from this session.
- Include strong durable memories and weaker candidate memories that may be worth later consolidation.

Rules:
- Session metadata alone is not enough.
- Do NOT save operational notes about this session-memory system, dispatch flow, or local implementation work unless they affect broader workflows beyond this repo.
- Favor stable user preferences, enduring project facts, reusable external artifacts, decisions, procedures, blockers, and compact episode summaries with future reuse value.
- It is OK to include lower-confidence candidates if they could become useful after consolidation.
- Avoid trivial chatter, one-off execution noise, and duplicates of the same point unless they capture materially different memories.
- Prefer at most one memory per distinct insight.
- Output JSON only. No markdown. No extra text.

Return this schema:
{{
  "memories": [
    {{
      "type": "profile" | "decision" | "project" | "artifact" | "episode" | "procedural" | "semantic",
      "subtype": string | null,
      "scope": "global" | "project",
      "title": string,
      "content": string,
      "source_ref": string,
      "evidence_ref": string | null,
      "confidence": number,
      "freshness": number,
      "importance": number,
      "reason": string
    }}
  ],
  "reason": string
}}

Scoring guidance:
- confidence: 0 to 1, how well supported the memory is.
- freshness: 0 to 1, how current it is.
- importance: 0 to 1, how much future value it has.
- evidence_ref: cite the strongest source in the session or branch excerpt when possible.
- Use lower confidence/importance for tentative candidates rather than omitting them.

Session metadata:
- session_id: {meta.get('session_id')}
- session_file: {meta.get('session_file')}
- cwd: {meta.get('cwd')}
- scope: {meta.get('scope')}
- reason: {meta.get('reason')}
- user_messages: {meta.get('user_messages')}
- assistant_messages: {meta.get('assistant_messages')}
- last_user_text: {str(meta.get('last_user_text') or '')[:2000]}
- last_assistant_text: {str(meta.get('last_assistant_text') or '')[:2000]}
- trigger: {meta.get('trigger')}

Branch excerpt JSON:
{json.dumps(meta.get('branch') or [], ensure_ascii=False)[:30000]}
"""


def normalize_memories(analysis: dict[str, Any]) -> list[dict[str, Any]]:
    memories = analysis.get("memories")
    if isinstance(memories, list):
        return [item for item in memories if isinstance(item, dict)]
    if bool(analysis.get("save")):
        return [analysis]
    return []


def choose_status(memory_type: str, confidence: float, importance: float) -> str:
    if memory_type in DURABLE_TYPES and confidence >= ACTIVE_CONFIDENCE_THRESHOLD and importance >= ACTIVE_IMPORTANCE_THRESHOLD:
        return "active"
    return "inbox"


def is_noise_memory(candidate: dict[str, Any]) -> bool:
    title = str(candidate.get("title") or "")
    content = str(candidate.get("content") or "")
    blob = f"{title}\n{content}".strip()
    if not blob:
        return True
    lowered = blob.lower()
    if any(pattern.search(blob) for pattern in NOISE_PATTERNS):
        return True
    if lowered.count("task_") >= 2:
        return True
    if lowered.count("payload:") >= 1 and lowered.count("route") >= 1:
        return True
    lines = [line.strip() for line in content.splitlines() if line.strip()]
    if lines:
        jsonish = sum(1 for line in lines if line.startswith(("Payload:", "{", "}", '"')) or '"' in line)
        if jsonish / len(lines) >= 0.45:
            return True
    return False


def build_memory_payload(candidate: dict[str, Any], meta: dict[str, Any], job_file: Path, analysis_reason: str | None, index: int) -> dict[str, Any]:
    memory_type = candidate.get("type") or "episode"
    confidence = float(candidate.get("confidence") or 0.6)
    importance = float(candidate.get("importance") or 0.5)
    status = choose_status(memory_type, confidence, importance)
    return {
        "type": memory_type,
        "subtype": candidate.get("subtype"),
        "scope": candidate.get("scope") or meta.get("scope") or "global",
        "status": status,
        "source_kind": "manual",
        "title": candidate.get("title") or f"Session insight {index}",
        "content": candidate.get("content") or "",
        "confidence": confidence,
        "freshness": float(candidate.get("freshness") or 0.9),
        "importance": importance,
        "source_ref": candidate.get("source_ref") or f"pi-session:{meta.get('session_file') or meta.get('session_id') or 'unknown'}",
        "evidence_ref": candidate.get("evidence_ref"),
        "origin_agent": "pi-session-memory-analyzer",
        "metadata": {
            "job_file": str(job_file),
            "session_id": meta.get("session_id"),
            "session_file": meta.get("session_file"),
            "cwd": meta.get("cwd"),
            "reason": meta.get("reason"),
            "user_messages": meta.get("user_messages"),
            "assistant_messages": meta.get("assistant_messages"),
            "analysis_reason": analysis_reason,
            "candidate_reason": candidate.get("reason"),
            "trigger": meta.get("trigger"),
            "promotion_candidate": status == "inbox",
            "session_candidate_index": index,
        },
    }


def main() -> int:
    parser = argparse.ArgumentParser()
    parser.add_argument("--job-file", required=True)
    args = parser.parse_args()

    job_file = Path(args.job_file)
    job = load_job(job_file)
    meta = session_meta(job)
    job["status"] = "running"
    job["analysis"] = {
        "status": "running",
        "save": None,
        "saved": None,
        "memory_id": None,
        "memory_ids": [],
        "saved_count": 0,
    }
    save_job(job_file, job)

    try:
        fallback_error = None
        try:
            raw = call_pi(build_prompt(job))
            analysis = extract_json(raw)
        except Exception as exc:
            fallback_error = exc
            analysis = fallback_analysis(job, exc)
            raw = json.dumps(analysis, ensure_ascii=False)
        candidates = normalize_memories(analysis)[:MAX_CANDIDATES]
        saved_items: list[dict[str, Any]] = []

        for index, candidate in enumerate(candidates, start=1):
            if is_noise_memory(candidate):
                continue
            memory = build_memory_payload(candidate, meta, job_file, analysis.get("reason"), index)
            if not memory["content"].strip() or not memory["title"].strip():
                continue
            saved_items.append(ingest(memory))

        job["analysis"]["raw"] = raw
        job["analysis"]["result"] = analysis
        job["analysis"]["status"] = "completed"
        job["status"] = "completed"
        if fallback_error is not None:
            job["analysis"]["fallback_error"] = str(fallback_error)
        job["analysis"]["save"] = bool(saved_items)
        job["analysis"]["saved"] = bool(saved_items)
        job["analysis"]["saved_count"] = len(saved_items)
        job["analysis"]["memory_ids"] = [item.get("id") for item in saved_items if item.get("id")]
        job["analysis"]["memory_id"] = job["analysis"]["memory_ids"][0] if job["analysis"]["memory_ids"] else None

        save_job(job_file, job)
        print(json.dumps(job["analysis"], ensure_ascii=False))
        return 0
    except Exception as exc:
        job["status"] = "failed"
        job["analysis"]["status"] = "failed"
        job["analysis"]["error"] = str(exc)
        save_job(job_file, job)
        print(json.dumps(job["analysis"], ensure_ascii=False), file=sys.stderr)
        return 1


if __name__ == "__main__":
    raise SystemExit(main())