#!/usr/bin/env python3
"""process-sprint-data.py — Transform raw MCP/Jira JSON into a compact digest + processed tickets file.

Designed to be the bridge between Jira data (MCP or REST) and generate-sprint-report.py.
Performs parent/subtask rollup so parents always reflect their subtasks' code activity.

Usage:
    # From one or more MCP JSON response files (with "issues" key):
    python3 process-sprint-data.py page1.json page2.json --tickets-out processed.json

    # From pre-extracted issues array:
    python3 process-sprint-data.py issues.json --raw-issues --tickets-out processed.json

    # Pipe from stdin (e.g., after fetch-jira-sprint-issues.sh):
    cat issues.json | python3 process-sprint-data.py - --raw-issues --tickets-out processed.json

stdout: Compact digest (one line per parent issue) for agent consumption.
--tickets-out: Processed parent-only tickets JSON for generate-sprint-report.py.
Also prints sprint metadata and a list of open-PR branches needing GitHub enrichment.
"""

import argparse
import json
import re
import sys
from collections import defaultdict


def parse_dev_field(raw):
    """Parse customfield_10000 → { pr_count, pr_state, pr_open }."""
    if not raw or raw == "{}":
        return {"pr_count": 0, "pr_state": None, "pr_open": None}
    try:
        m_count = re.search(r'"count":(\d+)', str(raw))
        m_state = re.search(r'"state":"(\w+)"', str(raw))
        m_open = re.search(r'"open":(true|false)', str(raw))
        return {
            "pr_count": int(m_count.group(1)) if m_count else 0,
            "pr_state": m_state.group(1) if m_state else None,
            "pr_open": m_open.group(1) == "true" if m_open else None,
        }
    except Exception:
        return {"pr_count": 0, "pr_state": None, "pr_open": None}


def parse_branch_field(raw):
    """Parse customfield_10097 → { repo, branch } or { repo, pr_number, pr_url } or None."""
    if not raw:
        return None
    m = re.match(r"https://github\.com/HumandDev/([^/]+)/tree/(.+)", raw)
    if m:
        return {"repo": m.group(1), "branch": m.group(2)}
    m = re.match(r"https://github\.com/HumandDev/([^/]+)/pull/(\d+)", raw)
    if m:
        return {"repo": m.group(1), "branch": None, "pr_number": int(m.group(2)), "pr_url": raw}
    return None


def extract_sprint_info(issues):
    """Find active sprint metadata from customfield_10020."""
    for issue in issues:
        sprints = issue.get("fields", {}).get("customfield_10020") or []
        for s in sprints:
            if isinstance(s, dict) and s.get("state") == "active":
                return {
                    "name": s.get("name", "Unknown Sprint"),
                    "start": (s.get("startDate") or "")[:10],
                    "end": (s.get("endDate") or "")[:10],
                    "id": s.get("id"),
                }
    return None


def extract_issue(issue):
    """Extract flat fields from a Jira issue."""
    f = issue.get("fields", {})
    itype = f.get("issuetype", {})
    status = f.get("status", {})
    status_cat = status.get("statusCategory", {})
    return {
        "key": issue["key"],
        "id": issue.get("id"),
        "summary": f.get("summary", ""),
        "type": itype.get("name", "?"),
        "subtask": itype.get("subtask", False),
        "hierarchy_level": itype.get("hierarchyLevel", 0),
        "status": status.get("name", "?"),
        "status_cat": status_cat.get("name", "?"),
        "priority": (f.get("priority") or {}).get("name", "Medium"),
        "assignee": (f.get("assignee") or {}).get("displayName"),
        "flagged": bool(f.get("customfield_10021") or f.get("flagged")),
        "points": f.get("customfield_10028"),
        "dev": parse_dev_field(f.get("customfield_10000", "")),
        "branch": parse_branch_field(f.get("customfield_10097")),
        "parent_key": (f.get("parent") or {}).get("key"),
        "subtasks": f.get("subtasks"),
    }


def detect_platform_from_title(summary):
    """Detect platform from Dev Task title prefix."""
    s = summary.strip()
    for prefix, platform in [
        ("Web |", "web"), ("Backend |", "backend"), ("Mobile |", "mobile"),
        ("Admin |", "web"), ("BE |", "backend"),
    ]:
        if s.startswith(prefix):
            return platform
    return None


def infer_parent_key(subtask, parent_keys_set, parent_title_map):
    """Infer parent key for a subtask when the 'parent' field is missing.

    Strategy: strip known platform prefixes from subtask title and find a parent
    whose title is contained in (or contains) the remainder.
    """
    title = subtask["summary"].strip()
    remainder = title
    for prefix in ("Web | ", "Backend | ", "Mobile | ", "Admin | ", "BE | "):
        if title.startswith(prefix):
            remainder = title[len(prefix):]
            break

    best_match = None
    best_len = 0
    for pkey, ptitle in parent_title_map.items():
        if remainder == ptitle or ptitle in remainder or remainder in ptitle:
            if len(ptitle) > best_len:
                best_match = pkey
                best_len = len(ptitle)
    return best_match


def rollup_subtasks(parents, subtasks):
    """Aggregate subtask data onto parent issues."""
    parent_keys_set = {p["key"] for p in parents}
    parent_title_map = {p["key"]: p["summary"] for p in parents}

    children = defaultdict(list)
    unmatched = 0
    for st in subtasks:
        pk = st.get("parent_key")
        if not pk or pk not in parent_keys_set:
            pk = infer_parent_key(st, parent_keys_set, parent_title_map)
        if pk:
            children[pk].append(st)
        else:
            unmatched += 1

    if unmatched > 0:
        print(f"WARNING: {unmatched} subtask(s) could not be matched to a parent", file=sys.stderr)

    for parent in parents:
        key = parent["key"]
        subs = children.get(key, [])
        if not subs:
            continue

        assignees = set()
        if parent["assignee"]:
            assignees.add(parent["assignee"])
        for s in subs:
            if s["assignee"]:
                assignees.add(s["assignee"])
        parent["assignees_rolled"] = sorted(assignees) if assignees else []

        if parent["dev"]["pr_count"] == 0 and parent["dev"]["pr_state"] is None:
            merged_count = 0
            open_count = 0
            repos = set()
            for s in subs:
                d = s["dev"]
                if d["pr_state"] == "MERGED":
                    merged_count += d["pr_count"]
                elif d["pr_state"] == "OPEN":
                    open_count += d["pr_count"]
                if s["branch"]:
                    repos.add(s["branch"]["repo"])
            if merged_count > 0 and open_count == 0:
                parent["dev"] = {"pr_count": merged_count, "pr_state": "MERGED", "pr_open": False}
            elif open_count > 0:
                parent["dev"] = {"pr_count": merged_count + open_count, "pr_state": "OPEN", "pr_open": True}
            parent["_rolled_repos"] = sorted(repos)

        if not parent["branch"]:
            for s in subs:
                if s["branch"]:
                    parent.setdefault("_rolled_repos", [])
                    if s["branch"]["repo"] not in parent.get("_rolled_repos", []):
                        parent.setdefault("_rolled_repos", []).append(s["branch"]["repo"])

        platforms = set()
        for s in subs:
            p = detect_platform_from_title(s["summary"])
            if p:
                platforms.add(p)
        parent["_platforms"] = sorted(platforms)

        open_branches = []
        for s in subs:
            if s["dev"]["pr_state"] == "OPEN" and s["branch"]:
                open_branches.append({
                    "subtask_key": s["key"],
                    "repo": s["branch"]["repo"],
                    "branch": s["branch"].get("branch"),
                })
        parent["_open_pr_branches"] = open_branches

        sub_statuses = [s["status_cat"] for s in subs]
        parent["_subtask_summary"] = {
            "total": len(subs),
            "done": sum(1 for sc in sub_statuses if sc == "Done"),
            "in_progress": sum(1 for sc in sub_statuses if sc == "In Progress"),
            "to_do": sum(1 for sc in sub_statuses if sc == "To Do"),
        }

    return children


def format_digest_line(p):
    """One compact line per parent issue."""
    assignees = ", ".join(p.get("assignees_rolled", [p["assignee"]] if p["assignee"] else []))
    dev = p["dev"]
    dev_str = f"{dev['pr_state'] or '—'}:{dev['pr_count']}"
    repos = ",".join(p.get("_rolled_repos", []))
    if not repos and p["branch"]:
        repos = p["branch"]["repo"]
    platforms = ",".join(p.get("_platforms", []))
    open_branches = "; ".join(
        f"{ob['subtask_key']}:{ob['repo']}:{ob.get('branch', '?')}"
        for ob in p.get("_open_pr_branches", [])
    )
    flags = []
    if p["flagged"]:
        flags.append("FLAGGED")
    ss = p.get("_subtask_summary")
    sub_info = ""
    if ss:
        sub_info = f"subs:{ss['done']}/{ss['total']}done"

    return "|".join([
        p["key"], p["type"], p["status_cat"], p["priority"],
        assignees or "—", dev_str, repos or "—", platforms or "—",
        open_branches or "", " ".join(flags), str(p.get("points") or ""),
        sub_info, p["summary"][:75],
    ])


def to_tickets_json(parents):
    """Convert parents to the shape expected by generate-sprint-report.py."""
    issues = []
    for p in parents:
        assignee_name = ", ".join(p.get("assignees_rolled", [])) if p.get("assignees_rolled") else p.get("assignee")
        dev_raw = "{}"
        d = p["dev"]
        if d["pr_count"] > 0 and d["pr_state"]:
            open_val = "true" if d["pr_open"] else "false"
            dev_raw = (
                f'{{"cachedValue":{{"summary":{{"pullrequest":{{"overall":'
                f'{{"count":{d["pr_count"]},"state":"{d["pr_state"]}","open":{open_val}}}}}}}}}}}'
            )

        branch_raw = None
        if p["branch"]:
            if p["branch"].get("pr_url"):
                branch_raw = p["branch"]["pr_url"]
            elif p["branch"].get("branch"):
                branch_raw = f"https://github.com/HumandDev/{p['branch']['repo']}/tree/{p['branch']['branch']}"
        elif p.get("_open_pr_branches"):
            ob = p["_open_pr_branches"][0]
            if ob.get("branch"):
                branch_raw = f"https://github.com/HumandDev/{ob['repo']}/tree/{ob['branch']}"

        issue = {
            "key": p["key"],
            "id": p.get("id"),
            "fields": {
                "summary": p["summary"],
                "issuetype": {"name": p["type"], "subtask": False},
                "status": {
                    "name": p["status"],
                    "statusCategory": {"name": p["status_cat"]},
                },
                "priority": {"name": p["priority"]},
                "assignee": {"displayName": assignee_name} if assignee_name else None,
                "customfield_10021": True if p["flagged"] else None,
                "customfield_10028": p.get("points"),
                "customfield_10000": dev_raw,
                "customfield_10097": branch_raw,
            },
        }
        issues.append(issue)
    return issues


def main():
    parser = argparse.ArgumentParser(description="Process Jira MCP JSON into compact digest + tickets file")
    parser.add_argument("files", nargs="+", help="MCP JSON response files, or '-' for stdin")
    parser.add_argument("--raw-issues", action="store_true",
                        help="Input is a flat issues array (not MCP wrapper with 'issues' key)")
    parser.add_argument("--tickets-out", default=None,
                        help="Write processed parent-only tickets JSON to this file")
    parser.add_argument("--sprint-out", default=None,
                        help="Write sprint metadata JSON to this file")

    args = parser.parse_args()

    all_issues = []
    for path in args.files:
        if path == "-":
            data = json.load(sys.stdin)
        else:
            with open(path) as f:
                data = json.load(f)

        if args.raw_issues:
            if isinstance(data, list):
                all_issues.extend(data)
            else:
                all_issues.extend(data.get("issues", [data]))
        else:
            all_issues.extend(data.get("issues", []))

    sprint = extract_sprint_info(all_issues)
    extracted = [extract_issue(i) for i in all_issues]
    parents = [e for e in extracted if not e["subtask"]]
    subtasks = [e for e in extracted if e["subtask"]]

    children_map = rollup_subtasks(parents, subtasks)

    if sprint:
        print(f"SPRINT: {sprint['name']} | {sprint['start']} — {sprint['end']}")
    else:
        print("SPRINT: (not found in data — customfield_10020 may be missing from fields)")
    print(f"TOTAL: {len(all_issues)} issues ({len(parents)} parent, {len(subtasks)} subtasks)")

    need_gh = [
        ob
        for p in parents
        for ob in p.get("_open_pr_branches", [])
    ]
    if need_gh:
        print(f"\nGITHUB_ENRICH: {len(need_gh)} open-PR branches need review status:")
        for ob in need_gh:
            print(f"  gh pr list --repo HumandDev/{ob['repo']} --search \"head:{ob.get('branch', '?')}\" --state open --limit 5 --json number,url,isDraft,reviewDecision,statusCheckRollup")
    else:
        print("\nGITHUB_ENRICH: none needed (no open PRs detected from Jira dev fields)")

    print(f"\n{'—' * 120}")
    print("KEY|TYPE|STATUS_CAT|PRIORITY|ASSIGNEES|DEV|REPOS|PLATFORMS|OPEN_BRANCHES|FLAGS|POINTS|SUBTASKS|SUMMARY")
    print(f"{'—' * 120}")
    for p in parents:
        print(format_digest_line(p))

    parents_needing_subtasks = [
        p["key"] for p in parents
        if p["key"] not in {st.get("parent_key") for st in subtasks}
           and p["key"] not in {pk for pk, subs in children_map.items() if subs}
    ]
    if parents_needing_subtasks:
        keys_csv = ", ".join(parents_needing_subtasks)
        print(f"\nBACKFILL_SUBTASKS: {len(parents_needing_subtasks)} parents have no subtasks in data.")
        print(f"  JQL: parent in ({keys_csv})")
        print(f"  Feed result back as an extra input file and re-run.")

    if args.tickets_out:
        tickets = to_tickets_json(parents)
        with open(args.tickets_out, "w") as f:
            json.dump(tickets, f, indent=2, ensure_ascii=False)
        print(f"\n=> Wrote {len(tickets)} parent tickets to {args.tickets_out}", file=sys.stderr)

    if args.sprint_out and sprint:
        with open(args.sprint_out, "w") as f:
            json.dump(sprint, f, indent=2)
        print(f"=> Wrote sprint metadata to {args.sprint_out}", file=sys.stderr)


if __name__ == "__main__":
    main()
