From d167609fade677aab916c23425e8427aef352343 Mon Sep 17 00:00:00 2001 From: Ankur Malik Date: Thu, 4 Dec 2025 10:59:02 -0500 Subject: [PATCH] Add pd v3 post processing block --- README.md | 14 ++++- __init__.py | 1 + block.py | 133 ++++++++++++++++++++++++++++++++++------ grade_cutoffs.csv | 7 +++ isotonic_model_A.joblib | Bin 0 -> 1319 bytes isotonic_model_B.joblib | Bin 0 -> 1351 bytes request_schema.json | 12 +++- requirements.txt | 3 +- response_schema.json | 17 ++++- test_block.py | 22 +++++++ 10 files changed, 186 insertions(+), 23 deletions(-) create mode 100644 __init__.py create mode 100644 grade_cutoffs.csv create mode 100644 isotonic_model_A.joblib create mode 100644 isotonic_model_B.joblib create mode 100644 test_block.py diff --git a/README.md b/README.md index 59a3efc..6c4416b 100644 --- a/README.md +++ b/README.md @@ -1 +1,13 @@ -**Hello world!!!** +# PD V3 Post-Processing + +- **Inputs:** Raw/isotonic PD outputs and model T weight from the processing block. +- **Outputs:** Final weighted PD and assigned grade. +- **Artifacts:** `grade_cutoffs.csv` generated from the Weighted Grades Cutoff workbook tab. +- **Tests:** `python -m unittest sequence-3.pd_v3_post_processing.test_block`. +- **Signature:** Sequence-3 convention: `__main__` must keep an explicit typed parameter list covering every input (int/float/str) and build the record from those args before weighting/grades; keep aligned with the block schemas. +- **UAT tolerance:** Downstream sequence-3 UAT scripts treat post-processing mismatches within `1e-4` as equivalent to expected values. +- **PD inputs:** `pd_a`, `pd_b`, and `pd_t` must be provided as non-null numbers (per schema); post-processing raises if any are missing/null. + +## Schema notes + +- The request and response schemas for post-processing are treated as immutable contracts. They already describe flat scalar fields (plus the array `pd_scores`), so keep them as-is and never refactor them into dict-of-dicts/object-of-dicts—arrays of dicts remain acceptable. diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..d3d26ed --- /dev/null +++ b/__init__.py @@ -0,0 +1 @@ +__all__ = ["__main__"] diff --git a/block.py b/block.py index 3b227f9..199ebb6 100644 --- a/block.py +++ b/block.py @@ -1,21 +1,116 @@ -@flowx_block -def example_function(request: dict) -> dict: +from __future__ import annotations - # Processing logic here... +import math +from pathlib import Path +from typing import Dict, List, Sequence, Tuple, TypedDict - return { - "meta_info": [ - { - "name": "created_date", - "type": "string", - "value": "2024-11-05" - } - ], - "fields": [ - { - "name": "", - "type": "", - "value": "" - } - ] - } +import joblib + + +MODELS_DIR = Path(__file__).parent +GRADE_FILE = Path(__file__).parent / "grade_cutoffs.csv" + +_GRADE_TABLE: List[Tuple[str, float, float]] | None = None +_ISOTONIC_MODELS: Dict[str, object] = {} +ISOTONIC_FILES = { + "a": MODELS_DIR / "isotonic_model_A.joblib", + "b": MODELS_DIR / "isotonic_model_B.joblib", +} + + +class ScoreEntry(TypedDict): + name: str + value: float + + +def _load_grade_table() -> None: + global _GRADE_TABLE + if _GRADE_TABLE is not None: + return + + table: List[Tuple[str, float, float]] = [] + with GRADE_FILE.open("r", encoding="utf-8") as handle: + next(handle) # skip header + for line in handle: + grade, min_pd, max_pd = line.strip().split(",") + table.append((grade, float(min_pd), float(max_pd))) + + table.sort(key=lambda row: row[1]) + _GRADE_TABLE = table + + +def _ensure_isotonic_models_loaded() -> None: + for key, path in ISOTONIC_FILES.items(): + if key in _ISOTONIC_MODELS: + continue + _ISOTONIC_MODELS[key] = joblib.load(path) + + +def _clamp_probability(value: float) -> float: + return min(1.0, max(0.0, float(value))) + + +def _determine_grade(final_pd: float) -> str | None: + if final_pd is None or _GRADE_TABLE is None: + return None + + for grade, min_pd, max_pd in _GRADE_TABLE: + if min_pd <= final_pd < max_pd: + return grade + # Allow equality with the top boundary to fall into the final grade. + last_grade, min_pd, max_pd = _GRADE_TABLE[-1] + if math.isclose(final_pd, max_pd): + return last_grade + return None + + +def _apply_isotonic(model_key: str, raw_pd: float) -> float: + calibrator = _ISOTONIC_MODELS.get(model_key) + if calibrator is None: + return _clamp_probability(raw_pd) + calibrated = calibrator.predict([raw_pd])[0] + return _clamp_probability(calibrated) + + +def __main__( + pd_a: float, + pd_b: float, + pd_t: float + +) -> Dict[str, float | str]: + """ + Inputs (request schema): + - pd_scores: ordered list of {"name": "pd_a"|"pd_b"|"pd_t", "value": } entries + - pd_scores_pd_a / pd_scores_pd_b / pd_scores_pd_t: explicit, non-null PD inputs; must match pd_scores when provided + + Outputs (response schema): + - pd_a: raw PD A clamped to [0,1] + - pd_b: raw PD B clamped to [0,1] + - pd_t: model T probability clamped to [0,1] + - pd_iso_a: isotonic-calibrated PD A + - pd_iso_b: isotonic-calibrated PD B + - final_pd: weighted final PD using pd_t as weight + - grade: assigned grade from the cutoff table + """ + + _load_grade_table() + _ensure_isotonic_models_loaded() + + + weight = _clamp_probability(pd_t) + pd_iso_a = _apply_isotonic("a", pd_a) + pd_iso_b = _apply_isotonic("b", pd_b) + + final_pd = (pd_iso_a * weight) + (pd_iso_b * (1 - weight)) + + grade = _determine_grade(final_pd) + + return { + "pd_a": _clamp_probability(pd_a), + "pd_b": _clamp_probability(pd_b), + "pd_t": weight, + "pd_iso_a": pd_iso_a, + "pd_iso_b": pd_iso_b, + "final_pd": final_pd, + "grade": grade if grade is not None else "", + } diff --git a/grade_cutoffs.csv b/grade_cutoffs.csv new file mode 100644 index 0000000..cfc4ae8 --- /dev/null +++ b/grade_cutoffs.csv @@ -0,0 +1,7 @@ +grade,min_pd,max_pd +A1,0.0000000000,0.05 +A2,0.0500000001,0.1 +B1,0.1000000001,0.15 +B2,0.1500000001,0.3 +C1,0.3000000001,0.35 +C2,0.3500000001,1 diff --git a/isotonic_model_A.joblib b/isotonic_model_A.joblib new file mode 100644 index 0000000000000000000000000000000000000000..d917c3274c0cb740988e93c30ca27a99688b231b GIT binary patch literal 1319 zcmY*ZeNa?Y6nEuYX9ba=Ns}3kZ)JltQARkz4kl125(W`I-|jwm&vxH#`*wxRFQh;| zUL(*0Q;>Ga1%!kJTtEUr1T>&T0wiS6k0lw`hQ+)gk@8CTj&g-8gJ2{jsM z0&6TINTD1q zgi<-ilPJXs0+q{A6+@d-LR5^(*}$}!(T7EmLM6itD@$XyGHIzNnZ1l_hzZTv&V+`B z2ESm3aSB2pC?kd>6|QJQdmD@L^J_m@D;cn-_oa39o1&nH^0Y?L=-ry6qkxc z6iaH=L8(AY2zbW0h>Q@jDPriAp$4k{26`XLQJD-gkeknnC8ZPy#VAEFxHWA-isnQZ zxKlXFat`rKW^Jil#2%tK{6#Ie9lZ<{i>2|1M1soL#H^W%X*`dhVj_-|V59;Os1z8* zgxDwrCalT@Wj`)|7`{1yf$%U((d`GhuwJ~<;Ueb@SUA?(_>7)~Wdm99*X#0uu<;7a zJ5~T$LmGAbS~Xa#&GK71t%kyk6ET_n8u(`F?H2>OLa=Ufy&9GNEj-^)eZCGm4`Gek zu)F{*B+iPa!!J$}w@9w~vX_x_tcGJFAIJk!j0!==#m zZaXd*D+9l6(%rl5%VE5ae|X5d0{SP9dc5JK19SI=)|wx5(1y8;ek!g6r<@|k$M#k5 z`)Ynb{f#PEwRMG?%kgTsn{w}FYTzYU^LfzNvDfr~I3uIFX+8XvVY~kDqZ;7(Sj_|FBM*U@WmpySWw<0Gw55IdOLl-3SjQ^5|^MK>VOH2>cjq!Y5AaZaD2Zo)#> zXpvH*Uj|?x5jriWb?U*?mmK=VrhXAQ0;d}lGYv47 zWtY#t&^2foWcH`>=Z}%xYQ!X8YPf z+uM}cJlPJL5B>ILban^qTetmwo=+DLsuD+x-wjcxY#+a~vmbIC7V2k%%zuB#Axv)k Q=x8%9-WHx0qZIJ|2bZ2&aR2}S literal 0 HcmV?d00001 diff --git a/isotonic_model_B.joblib b/isotonic_model_B.joblib new file mode 100644 index 0000000000000000000000000000000000000000..ae9be55e88146f3ffe0b682e5c8c84e7fa276bef GIT binary patch literal 1351 zcmY*ZX;c$e6b3QK0heIWYHh4NDqw-&0xFAMEiII#5wQi1)iX{OG9$@MoJp*)8nKFC z9i_rht))V#jRgfIVT%$FgO#<4EX6G%1+5%ttECII9y$q2Pv4LC-SghP?|$EX_q~+K z*(Ht+_QCM!WQ7c+2q8|BYLdVuEHg85%#4%CDH%=UB*A8}OZKySS#O4?MU*(fitGlu zi!)qtLPE(<8YkpzIx~&bs1Y&=5tAB1O0&#li2_%#jN5-s2N{vE|qZEZ|S;m=`pbC`Y5;Huz4~J59Oe=t@2Y3!=Ky<%^Rs6}cOfh`9rFlt5yYA*jk9~k_NFBwYK&8gkqRlz zk$P!6DOTWOp*=66!X;dZSjJssqq^QkFM&c;Dw&PkSXEqTjaZ^UX`01mFjENWs0bT( z8bdkG(aTdgo1~;%5aSp&p#}3`Tv3IB{18zpQ59F1d80Y)@d&EG<%Cj3s1dPNEu-0J zzDDfCsT@~U9#VBrwWTN&e13I}@4S(PiwAhCGf zT76hK_-%M>OxRoj$6jtZR=uMVdgJ^f5c0k z@T$8I?R9bHZ?9Tmyo<6|j6Nm@W}5bYorJwU+8@{=%oFpnH37B+djs>qJTTvM%y0ZY zg)s#$jIo*U=0EoyoSbwDW?GB(<*D<)^L%hiXUZAy-`aO}O^6-}>-^h|35B4PNM7Wc z3qdjxoL1U*egcbvkMGG}T>`({cAC10GQhyK=SR)m1_*A{^NIqEP%)?r;OCc3px31M zxt*yMkh1?q;7-Z}it_H0Bx`~>9q%s=*PGyCyyO(^UIlLby#WOoW>A`Rk-1mRz&!mj zZFuUH2|SQk7RVf|1-)VS?_Ec(g7x8Y_dkOyP}aMf!Eo=oWO*xi<{S<$w%!8K!iVUQ!CUaoKvcw1K|5521a1p$?tq@T zscjkMoe;o(cF%IR3wS}h8$LaB2dW7tf$!4$;4pI3yKhksWX~VJPd+mCJt8}0l>H-Q Oc*}$