"""
URL safety checking service using the VirusTotal API.

Checks URLs against VirusTotal's database and caches results locally
to avoid repeated API calls. Results are cached for 7 days.
"""

from __future__ import annotations

import base64
import datetime as dt
import hashlib
import logging
from typing import Optional

import httpx

from ..config import Config
from ..models import UrlSafetyCheck

logger = logging.getLogger(__name__)

CACHE_DAYS = 7


def _url_hash(url: str) -> str:
    """Generate a SHA-256 hash of the normalized URL."""
    return hashlib.sha256(url.strip().encode()).hexdigest()


def _classify(detections: int) -> str:
    """Classify a URL based on detection count."""
    if detections <= 1:
        return "safe"
    if detections <= 5:
        return "warning"
    return "dangerous"


def check_url(session, url: str) -> dict:
    """Check a URL against VirusTotal, using cache when available.

    Returns a dict with: url, detections, total_scanners, status, cached.
    """
    h = _url_hash(url)

    # Check cache first
    cached = session.query(UrlSafetyCheck).filter_by(url_hash=h).one_or_none()
    if cached:
        age = dt.datetime.utcnow() - cached.checked_at
        if age.days < CACHE_DAYS:
            return {
                "url": url,
                "detections": cached.detections,
                "total_scanners": cached.total_scanners,
                "status": cached.status,
                "cached": True,
            }

    # Call VirusTotal API
    api_key = Config.VIRUSTOTAL_API_KEY
    if not api_key:
        return {
            "url": url,
            "detections": 0,
            "total_scanners": 0,
            "status": "unknown",
            "cached": False,
            "error": "No VirusTotal API key configured",
        }

    try:
        # VirusTotal v3 API: URL must be base64-encoded (without padding)
        url_id = base64.urlsafe_b64encode(url.encode()).decode().rstrip("=")
        resp = httpx.get(
            f"https://www.virustotal.com/api/v3/urls/{url_id}",
            headers={"x-apikey": api_key},
            timeout=10,
        )

        if resp.status_code == 404:
            # URL not in VT database — submit it for scanning
            httpx.post(
                "https://www.virustotal.com/api/v3/urls",
                headers={"x-apikey": api_key},
                data={"url": url},
                timeout=10,
            )
            # Return unknown for now — will be available on next check
            result = {
                "url": url,
                "detections": 0,
                "total_scanners": 0,
                "status": "pending",
                "cached": False,
            }
        elif resp.status_code == 200:
            data = resp.json().get("data", {}).get("attributes", {})
            stats = data.get("last_analysis_stats", {})
            detections = stats.get("malicious", 0) + stats.get("suspicious", 0)
            total = sum(stats.values()) if stats else 0
            status = _classify(detections)
            result = {
                "url": url,
                "detections": detections,
                "total_scanners": total,
                "status": status,
                "cached": False,
            }
        else:
            logger.warning("VirusTotal API returned %d for %s", resp.status_code, url)
            result = {
                "url": url,
                "detections": 0,
                "total_scanners": 0,
                "status": "error",
                "cached": False,
                "error": f"VT API returned {resp.status_code}",
            }

        # Cache the result
        if cached:
            cached.detections = result["detections"]
            cached.total_scanners = result["total_scanners"]
            cached.status = result["status"]
            cached.checked_at = dt.datetime.utcnow()
        else:
            session.add(UrlSafetyCheck(
                url_hash=h,
                url=url,
                detections=result["detections"],
                total_scanners=result["total_scanners"],
                status=result["status"],
            ))

        return result

    except httpx.HTTPError as e:
        logger.error("VirusTotal request failed for %s: %s", url, e)
        return {
            "url": url,
            "detections": 0,
            "total_scanners": 0,
            "status": "error",
            "cached": False,
            "error": str(e),
        }
