#!/usr/bin/env python3
import re
import sys
from urllib.parse import urljoin

import requests
from bs4 import BeautifulSoup

BASE = "https://ballbox-first.emperor-ratio.ts.net/rss/"
TIMEOUT = 20
HEADERS = {"user-agent": "Mozilla/5.0 (FreshRSS prewarm)"}
STATIC_PATHS = [
    BASE,
    urljoin(BASE, "./?a=normal"),
    urljoin(BASE, "./?a=normal&get=i"),
    urljoin(BASE, "./?a=normal&get=s"),
]


def collect_feed_pages(html: str, base_url: str) -> list[str]:
    soup = BeautifulSoup(html, "html.parser")
    links: list[str] = []
    for a in soup.find_all("a", href=True):
        href = a["href"]
        absolute = urljoin(base_url, href)
        if not absolute.startswith("https://ballbox-first.emperor-ratio.ts.net/rss/"):
            continue
        if re.search(r"\?a=normal&get=f_\d+", absolute) or re.search(r"\?get=f_\d+", absolute):
            links.append(absolute)
    seen = set()
    ordered = []
    for link in links:
        if link in seen:
            continue
        seen.add(link)
        ordered.append(link)
    return ordered


def fetch(session: requests.Session, url: str) -> None:
    response = session.get(url, timeout=TIMEOUT, headers=HEADERS)
    print(response.status_code, url)
    response.raise_for_status()


def main() -> int:
    session = requests.Session()
    first = session.get(BASE, timeout=TIMEOUT, headers=HEADERS)
    first.raise_for_status()
    print(first.status_code, BASE)

    for path in STATIC_PATHS[1:]:
        fetch(session, path)

    for link in collect_feed_pages(first.text, first.url):
        fetch(session, link)

    return 0


if __name__ == "__main__":
    sys.exit(main())
