diff --git a/README.md b/README.md index 59a3efc..6c4416b 100644 --- a/README.md +++ b/README.md @@ -1 +1,13 @@ -**Hello world!!!** +# PD V3 Post-Processing + +- **Inputs:** Raw/isotonic PD outputs and model T weight from the processing block. +- **Outputs:** Final weighted PD and assigned grade. +- **Artifacts:** `grade_cutoffs.csv` generated from the Weighted Grades Cutoff workbook tab. +- **Tests:** `python -m unittest sequence-3.pd_v3_post_processing.test_block`. +- **Signature:** Sequence-3 convention: `__main__` must keep an explicit typed parameter list covering every input (int/float/str) and build the record from those args before weighting/grades; keep aligned with the block schemas. +- **UAT tolerance:** Downstream sequence-3 UAT scripts treat post-processing mismatches within `1e-4` as equivalent to expected values. +- **PD inputs:** `pd_a`, `pd_b`, and `pd_t` must be provided as non-null numbers (per schema); post-processing raises if any are missing/null. + +## Schema notes + +- The request and response schemas for post-processing are treated as immutable contracts. They already describe flat scalar fields (plus the array `pd_scores`), so keep them as-is and never refactor them into dict-of-dicts/object-of-dicts—arrays of dicts remain acceptable. diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..d3d26ed --- /dev/null +++ b/__init__.py @@ -0,0 +1 @@ +__all__ = ["__main__"] diff --git a/block.py b/block.py index 3b227f9..199ebb6 100644 --- a/block.py +++ b/block.py @@ -1,21 +1,116 @@ -@flowx_block -def example_function(request: dict) -> dict: +from __future__ import annotations - # Processing logic here... +import math +from pathlib import Path +from typing import Dict, List, Sequence, Tuple, TypedDict - return { - "meta_info": [ - { - "name": "created_date", - "type": "string", - "value": "2024-11-05" - } - ], - "fields": [ - { - "name": "", - "type": "", - "value": "" - } - ] - } +import joblib + + +MODELS_DIR = Path(__file__).parent +GRADE_FILE = Path(__file__).parent / "grade_cutoffs.csv" + +_GRADE_TABLE: List[Tuple[str, float, float]] | None = None +_ISOTONIC_MODELS: Dict[str, object] = {} +ISOTONIC_FILES = { + "a": MODELS_DIR / "isotonic_model_A.joblib", + "b": MODELS_DIR / "isotonic_model_B.joblib", +} + + +class ScoreEntry(TypedDict): + name: str + value: float + + +def _load_grade_table() -> None: + global _GRADE_TABLE + if _GRADE_TABLE is not None: + return + + table: List[Tuple[str, float, float]] = [] + with GRADE_FILE.open("r", encoding="utf-8") as handle: + next(handle) # skip header + for line in handle: + grade, min_pd, max_pd = line.strip().split(",") + table.append((grade, float(min_pd), float(max_pd))) + + table.sort(key=lambda row: row[1]) + _GRADE_TABLE = table + + +def _ensure_isotonic_models_loaded() -> None: + for key, path in ISOTONIC_FILES.items(): + if key in _ISOTONIC_MODELS: + continue + _ISOTONIC_MODELS[key] = joblib.load(path) + + +def _clamp_probability(value: float) -> float: + return min(1.0, max(0.0, float(value))) + + +def _determine_grade(final_pd: float) -> str | None: + if final_pd is None or _GRADE_TABLE is None: + return None + + for grade, min_pd, max_pd in _GRADE_TABLE: + if min_pd <= final_pd < max_pd: + return grade + # Allow equality with the top boundary to fall into the final grade. + last_grade, min_pd, max_pd = _GRADE_TABLE[-1] + if math.isclose(final_pd, max_pd): + return last_grade + return None + + +def _apply_isotonic(model_key: str, raw_pd: float) -> float: + calibrator = _ISOTONIC_MODELS.get(model_key) + if calibrator is None: + return _clamp_probability(raw_pd) + calibrated = calibrator.predict([raw_pd])[0] + return _clamp_probability(calibrated) + + +def __main__( + pd_a: float, + pd_b: float, + pd_t: float + +) -> Dict[str, float | str]: + """ + Inputs (request schema): + - pd_scores: ordered list of {"name": "pd_a"|"pd_b"|"pd_t", "value": } entries + - pd_scores_pd_a / pd_scores_pd_b / pd_scores_pd_t: explicit, non-null PD inputs; must match pd_scores when provided + + Outputs (response schema): + - pd_a: raw PD A clamped to [0,1] + - pd_b: raw PD B clamped to [0,1] + - pd_t: model T probability clamped to [0,1] + - pd_iso_a: isotonic-calibrated PD A + - pd_iso_b: isotonic-calibrated PD B + - final_pd: weighted final PD using pd_t as weight + - grade: assigned grade from the cutoff table + """ + + _load_grade_table() + _ensure_isotonic_models_loaded() + + + weight = _clamp_probability(pd_t) + pd_iso_a = _apply_isotonic("a", pd_a) + pd_iso_b = _apply_isotonic("b", pd_b) + + final_pd = (pd_iso_a * weight) + (pd_iso_b * (1 - weight)) + + grade = _determine_grade(final_pd) + + return { + "pd_a": _clamp_probability(pd_a), + "pd_b": _clamp_probability(pd_b), + "pd_t": weight, + "pd_iso_a": pd_iso_a, + "pd_iso_b": pd_iso_b, + "final_pd": final_pd, + "grade": grade if grade is not None else "", + } diff --git a/grade_cutoffs.csv b/grade_cutoffs.csv new file mode 100644 index 0000000..cfc4ae8 --- /dev/null +++ b/grade_cutoffs.csv @@ -0,0 +1,7 @@ +grade,min_pd,max_pd +A1,0.0000000000,0.05 +A2,0.0500000001,0.1 +B1,0.1000000001,0.15 +B2,0.1500000001,0.3 +C1,0.3000000001,0.35 +C2,0.3500000001,1 diff --git a/isotonic_model_A.joblib b/isotonic_model_A.joblib new file mode 100644 index 0000000..d917c32 Binary files /dev/null and b/isotonic_model_A.joblib differ diff --git a/isotonic_model_B.joblib b/isotonic_model_B.joblib new file mode 100644 index 0000000..ae9be55 Binary files /dev/null and b/isotonic_model_B.joblib differ diff --git a/request_schema.json b/request_schema.json index 0967ef4..23f59b7 100644 --- a/request_schema.json +++ b/request_schema.json @@ -1 +1,11 @@ -{} +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "pd_a": { "type": "number" }, + "pd_b": { "type": "number" }, + "pd_t": { "type": "number" } + }, + "required": ["pd_a", "pd_b", "pd_t"], + "additionalProperties": false +} diff --git a/requirements.txt b/requirements.txt index 0967ef4..1ecb4df 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ -{} +joblib==1.5.2 +scikit-learn==1.7.2 \ No newline at end of file diff --git a/response_schema.json b/response_schema.json index 0967ef4..639f9eb 100644 --- a/response_schema.json +++ b/response_schema.json @@ -1 +1,16 @@ -{} +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "pd_a": { "type": "number" }, + "pd_b": { "type": "number" }, + "pd_t": { "type": "number" }, + "pd_iso_a": {"type": "number" }, + "pd_iso_b": {"type": "number" }, + "final_pd": {"type": "number" }, + "grade": {"type": "string"} + }, + "required": ["final_pd", "grade", "pd_a", "pd_b", "pd_t", "pd_iso_a", "pd_iso_b"], + "additionalProperties": false +} + diff --git a/test_block.py b/test_block.py new file mode 100644 index 0000000..65ba8ef --- /dev/null +++ b/test_block.py @@ -0,0 +1,22 @@ +import unittest +from block import __main__ + +data = {'pd_a': 0.030282551422715187, 'pd_b': 0.07098247110843658, 'pd_t': 0.6349245309829712} + +class TestBlock(unittest.TestCase): + def test_main_returns_scores(self): + block_result = __main__(**data) + print(block_result) + self.assertIsInstance(block_result, dict) + self.assertIn("pd_a", block_result) + self.assertIn("pd_b", block_result) + self.assertIn("pd_t", block_result) + self.assertIn("pd_iso_a", block_result) + self.assertIn("pd_iso_b", block_result) + self.assertIn("final_pd", block_result) + self.assertIn("grade", block_result) + +if __name__ == "__main__": # pragma: no cover + unittest.main() + +