#!/usr/bin/env python3
"""
Fetch April hourly data for every year 1985-2026 (2026 is partial, Apr 1-20),
compute per-year key metrics, and write bangalore_yearly.json for time-series
trend analysis.

Answers the question: is 2026 an aberration or the tail of a trend?
"""

import json
import time
import urllib.parse
import urllib.request
from datetime import date
from pathlib import Path
from statistics import mean

HERE = Path(__file__).resolve().parent
ARCHIVE_URL = "https://archive-api.open-meteo.com/v1/archive"
LAT = 12.9716
LON = 77.5946
TZ = "Asia/Kolkata"
VARS = ["temperature_2m"]

YEARS = list(range(1985, 2027))  # 1985..2026 inclusive

# 2026 is partial: fetch through yesterday (Apr 20, since today = 2026-04-21).
PARTIAL_2026_END = (4, 20)

NIGHT_HOURS = set(range(22, 24)) | set(range(0, 7))  # 22:00 - 06:59


def fetch_year(year: int):
    end_md = PARTIAL_2026_END if year == 2026 else (4, 30)
    params = {
        "latitude": LAT,
        "longitude": LON,
        "start_date": f"{year}-04-01",
        "end_date": f"{year}-{end_md[0]:02d}-{end_md[1]:02d}",
        "hourly": ",".join(VARS),
        "timezone": TZ,
    }
    url = ARCHIVE_URL + "?" + urllib.parse.urlencode(params)
    req = urllib.request.Request(url, headers={"User-Agent": "bangalore-is-hot-yearly/1.0"})
    with urllib.request.urlopen(req, timeout=60) as resp:
        data = json.loads(resp.read().decode("utf-8"))
    return data.get("hourly", {})


def compute_year(year: int, hourly: dict) -> dict:
    times = hourly["time"]
    temps = hourly["temperature_2m"]

    per_day = {}
    night_below_22_hours = 0
    day_ge_32_hours = 0
    day_ge_30_hours = 0
    night_ge_22_hours = 0
    night_ge_24_hours = 0
    night_ge_26_hours = 0
    n_valid_hours = 0

    for i, t_iso in enumerate(times):
        temp = temps[i]
        if temp is None:
            continue
        n_valid_hours += 1
        date_part, time_part = t_iso.split("T")
        hour = int(time_part[:2])
        is_night = hour in NIGHT_HOURS

        if is_night:
            if temp < 22:
                night_below_22_hours += 1
            if temp >= 22: night_ge_22_hours += 1
            if temp >= 24: night_ge_24_hours += 1
            if temp >= 26: night_ge_26_hours += 1
        else:
            # day window 07:00-21:59
            if temp >= 30: day_ge_30_hours += 1
            if temp >= 32: day_ge_32_hours += 1

        per_day.setdefault(date_part, []).append(temp)

    daily_mins = [min(v) for v in per_day.values() if v]
    daily_maxs = [max(v) for v in per_day.values() if v]
    n_days = len(per_day)

    return {
        "year": year,
        "n_days": n_days,
        "avg_daily_min": mean(daily_mins) if daily_mins else None,
        "avg_daily_max": mean(daily_maxs) if daily_maxs else None,
        "min_daily_min": min(daily_mins) if daily_mins else None,
        "max_daily_max": max(daily_maxs) if daily_maxs else None,
        # Per-night / per-day averages
        "night_below_22_hrs_per_night": night_below_22_hours / n_days if n_days else None,
        "night_ge_22_hrs_per_night": night_ge_22_hours / n_days if n_days else None,
        "night_ge_24_hrs_per_night": night_ge_24_hours / n_days if n_days else None,
        "night_ge_26_hrs_per_night": night_ge_26_hours / n_days if n_days else None,
        "day_ge_30_hrs_per_day": day_ge_30_hours / n_days if n_days else None,
        "day_ge_32_hrs_per_day": day_ge_32_hours / n_days if n_days else None,
    }


def main():
    results = []
    failed = []

    for y in YEARS:
        try:
            print(f"  fetching {y}...", flush=True)
            hourly = fetch_year(y)
            if not hourly.get("time"):
                raise RuntimeError("empty hourly data")
            stats = compute_year(y, hourly)
            results.append(stats)
            mn = stats["avg_daily_min"]
            mx = stats["avg_daily_max"]
            coolhrs = stats["night_below_22_hrs_per_night"]
            stresshrs = stats["day_ge_32_hrs_per_day"]
            print(f"    {y}: min={mn:5.2f}, max={mx:5.2f}, cool_hrs/night={coolhrs:4.2f}, ≥32_hrs/day={stresshrs:4.2f}")
        except Exception as e:
            print(f"    !! failed for {y}: {e}", flush=True)
            failed.append({"year": y, "error": str(e)})
        time.sleep(1.0)

    out = {
        "location": {"lat": LAT, "lon": LON, "timezone": TZ},
        "note": "April per-year metrics. 2026 covers Apr 1-20 only.",
        "failed": failed,
        "years": results,
    }
    (HERE / "bangalore_yearly.json").write_text(json.dumps(out, indent=2))
    print(f"\nwrote bangalore_yearly.json ({len(results)} years, {len(failed)} failed)")


if __name__ == "__main__":
    main()
