#!/usr/bin/env python3
"""
Normalize absolute paths in data/findings.db to repo-relative paths.

Goals:
- Make findings portable across machines (no /Users/... paths in DB).
- Ensure scan_briefing_path points to an existing file (prefer archived path if present).

This script is idempotent.

Usage:
  python3 scripts/db_normalize_paths.py
  python3 scripts/db_normalize_paths.py --dry-run
"""

from __future__ import annotations

import argparse
import sqlite3
from pathlib import Path


ROOT = Path(__file__).resolve().parents[1]
DB_PATH = ROOT / "data" / "findings.db"


def to_rel(p: str) -> str:
    s = (p or "").strip()
    if not s:
        return s

    path = Path(s)
    # Normalize any absolute path under repo root to relative.
    if path.is_absolute():
        try:
            rel = path.resolve().relative_to(ROOT.resolve())
            return str(rel)
        except Exception:
            return s
    return s


def prefer_existing_scan_path(scan_path: str) -> str:
    """
    If scan_path doesn't exist but the archived equivalent does, rewrite to archive.
    """
    s = (scan_path or "").strip()
    if not s:
        return s

    rel = Path(to_rel(s))
    abs_path = (ROOT / rel).resolve()
    if abs_path.exists():
        return str(rel)

    # If a scan briefing has been archived, use the archive path.
    if rel.parts and rel.parts[0] == "briefings":
        # briefings/SCAN_x.md -> briefings/archive/SCAN_x.md
        if len(rel.parts) == 2 and rel.parts[1].startswith("SCAN_"):
            candidate = ROOT / "briefings" / "archive" / rel.parts[1]
            if candidate.exists():
                return str(Path("briefings") / "archive" / rel.parts[1])

    return str(rel)


def main() -> int:
    ap = argparse.ArgumentParser()
    ap.add_argument("--dry-run", action="store_true")
    args = ap.parse_args()

    if not DB_PATH.exists():
        raise SystemExit(f"DB not found: {DB_PATH}")

    conn = sqlite3.connect(str(DB_PATH))
    conn.row_factory = sqlite3.Row
    try:
        rows = conn.execute(
            """
            SELECT id, report_path, scan_briefing_path
            FROM findings
            WHERE (report_path IS NOT NULL AND report_path != '')
               OR (scan_briefing_path IS NOT NULL AND scan_briefing_path != '');
            """
        ).fetchall()

        updates: list[tuple[int, str, str]] = []
        for r in rows:
            rid = int(r["id"])
            rp_old = (r["report_path"] or "").strip()
            sp_old = (r["scan_briefing_path"] or "").strip()

            rp_new = to_rel(rp_old)
            sp_new = prefer_existing_scan_path(sp_old)

            if rp_new != rp_old or sp_new != sp_old:
                updates.append((rid, rp_new, sp_new))

        if not updates:
            print("No path updates needed.")
            return 0

        for rid, rp_new, sp_new in updates:
            print(f"id={rid} report_path={rp_new!r} scan_briefing_path={sp_new!r}")

        if args.dry_run:
            print(f"Dry run: {len(updates)} rows would be updated.")
            return 0

        conn.executemany(
            "UPDATE findings SET report_path=?, scan_briefing_path=?, updated_at=datetime('now') WHERE id=?;",
            [(rp_new, sp_new, rid) for (rid, rp_new, sp_new) in updates],
        )
        conn.commit()
        print(f"Updated {len(updates)} rows.")
        return 0
    finally:
        conn.close()


if __name__ == "__main__":
    raise SystemExit(main())

