blocks-transformer/block.py

from __future__ import annotations

import math
from pathlib import Path
from typing import Dict, List, Sequence, Tuple, TypedDict

import joblib


MODELS_DIR = Path(__file__).parent
GRADE_FILE = Path(__file__).parent / "grade_cutoffs.csv"

_GRADE_TABLE: List[Tuple[str, float, float]] | None = None
_ISOTONIC_MODELS: Dict[str, object] = {}
ISOTONIC_FILES = {
    "a": MODELS_DIR / "isotonic_model_A.joblib",
    "b": MODELS_DIR / "isotonic_model_B.joblib",
}


class ScoreEntry(TypedDict):
    name: str
    value: float


def _load_grade_table() -> None:
    global _GRADE_TABLE
    if _GRADE_TABLE is not None:
        return

    table: List[Tuple[str, float, float]] = []
    with GRADE_FILE.open("r", encoding="utf-8") as handle:
        next(handle)  # skip header
        for line in handle:
            grade, min_pd, max_pd = line.strip().split(",")
            table.append((grade, float(min_pd), float(max_pd)))

    table.sort(key=lambda row: row[1])
    _GRADE_TABLE = table


def _ensure_isotonic_models_loaded() -> None:
    for key, path in ISOTONIC_FILES.items():
        if key in _ISOTONIC_MODELS:
            continue
        _ISOTONIC_MODELS[key] = joblib.load(path)


def _clamp_probability(value: float) -> float:
    return min(1.0, max(0.0, float(value)))


def _determine_grade(final_pd: float) -> str | None:
    if final_pd is None or _GRADE_TABLE is None:
        return None

    for grade, min_pd, max_pd in _GRADE_TABLE:
        if min_pd <= final_pd < max_pd:
            return grade
    # Allow equality with the top boundary to fall into the final grade.
    last_grade, min_pd, max_pd = _GRADE_TABLE[-1]
    if math.isclose(final_pd, max_pd):
        return last_grade
    return None


def _apply_isotonic(model_key: str, raw_pd: float) -> float:
    calibrator = _ISOTONIC_MODELS.get(model_key)
    if calibrator is None:
        return _clamp_probability(raw_pd)
    calibrated = calibrator.predict([raw_pd])[0]
    return _clamp_probability(calibrated)


def __main__(
    pd_a: float,
    pd_b: float,
    pd_t: float

) -> Dict[str, float | str]:
    """
    Inputs (request schema):
    - pd_scores: ordered list of {"name": "pd_a"|"pd_b"|"pd_t", "value": <float>} entries
    - pd_scores_pd_a / pd_scores_pd_b / pd_scores_pd_t: explicit, non-null PD inputs; must match pd_scores when provided

    Outputs (response schema):
    - pd_a: raw PD A clamped to [0,1]
    - pd_b: raw PD B clamped to [0,1]
    - pd_t: model T probability clamped to [0,1]
    - pd_iso_a: isotonic-calibrated PD A
    - pd_iso_b: isotonic-calibrated PD B
    - final_pd: weighted final PD using pd_t as weight
    - grade: assigned grade from the cutoff table
    """

    _load_grade_table()
    _ensure_isotonic_models_loaded()


    weight = _clamp_probability(pd_t)
    pd_iso_a = _apply_isotonic("a", pd_a)
    pd_iso_b = _apply_isotonic("b", pd_b)

    final_pd = (pd_iso_a * weight) + (pd_iso_b * (1 - weight))

    grade = _determine_grade(final_pd)

    return {
        "pd_a": _clamp_probability(pd_a),
        "pd_b": _clamp_probability(pd_b),
        "pd_t": weight,
        "pd_iso_a": pd_iso_a,
        "pd_iso_b": pd_iso_b,
        "final_pd": final_pd,
        "grade": grade if grade is not None else "",
    }