Compare commits
1 Commits
main
...
pd-v3-proc
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
90479dfe13 |
12
README.md
12
README.md
@ -1 +1,11 @@
|
|||||||
**Hello world!!!**
|
# PD V3 Processing
|
||||||
|
|
||||||
|
- **Inputs:** Treated feature dictionaries per model from pre-processing.
|
||||||
|
- **Outputs:** Raw and isotonic PD scores for models A/B plus model T probability.
|
||||||
|
- **Artifacts:** Model binaries located under `models/` (XGBoost + isotonic joblib files).
|
||||||
|
- **Tests:** `python -m unittest sequence-3.pd_v3_processing.test_block`.
|
||||||
|
- **Signature:** Sequence-3 convention: `__main__` must keep an explicit typed parameter list covering every input (int/float/str) and build the record from those args before scoring; keep aligned with the block schemas.
|
||||||
|
|
||||||
|
## Schema notes
|
||||||
|
|
||||||
|
- `request_schema.json` and `response_schema.json` for this block are frozen. They describe arrays of `{name, value}` dicts (no nested dict-of-dicts), so that structure must be preserved—do not switch these schemas to dict-of-dicts or object-of-dicts even as the block code evolves. Arrays-of-dicts are still allowed where schematically appropriate.
|
||||||
|
|||||||
1
__init__.py
Normal file
1
__init__.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
__all__ = ["__main__"]
|
||||||
435
block.py
435
block.py
@ -1,21 +1,422 @@
|
|||||||
@flowx_block
|
import json
|
||||||
def example_function(request: dict) -> dict:
|
import logging
|
||||||
|
from functools import lru_cache
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict, List, Tuple
|
||||||
|
|
||||||
# Processing logic here...
|
import joblib
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import xgboost as xgb
|
||||||
|
from pandas.api.types import (
|
||||||
|
is_bool_dtype,
|
||||||
|
is_categorical_dtype,
|
||||||
|
is_float_dtype,
|
||||||
|
is_integer_dtype,
|
||||||
|
is_object_dtype,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Paths & constants
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
try:
|
||||||
|
BASE_DIR = Path(__file__).resolve().parent
|
||||||
|
except NameError:
|
||||||
|
# Fallback for environments where __file__ is not defined (e.g. some REPLs / notebooks)
|
||||||
|
BASE_DIR = Path.cwd()
|
||||||
|
|
||||||
|
A_MODEL_PATH = BASE_DIR / "xgboost_model_A.joblib"
|
||||||
|
A_CATEGORY_ORDERS_PATH = BASE_DIR / "category_orders_train_A.json"
|
||||||
|
|
||||||
|
B_MODEL_PATH = BASE_DIR / "xgboost_model_B.joblib"
|
||||||
|
B_CATEGORY_ORDERS_PATH = BASE_DIR / "category_orders_train_B.json"
|
||||||
|
|
||||||
|
T_MODEL_PATH = BASE_DIR / "xgboost_model_T.joblib"
|
||||||
|
T_CATEGORY_ORDERS_PATH = BASE_DIR / "category_orders_train_T.json"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Loaders
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _load_category_orders(path: Path) -> Dict[str, Any]:
|
||||||
|
"""Load category orders JSON from disk."""
|
||||||
|
with open(path, "r") as f:
|
||||||
|
return json.load(f)
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=1)
|
||||||
|
def _load_a_model():
|
||||||
|
"""Load and cache model A."""
|
||||||
|
logger.info("Loading model A from %s", A_MODEL_PATH)
|
||||||
|
return joblib.load(A_MODEL_PATH)
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=1)
|
||||||
|
def _load_b_model():
|
||||||
|
"""Load and cache model B."""
|
||||||
|
logger.info("Loading model B from %s", B_MODEL_PATH)
|
||||||
|
return joblib.load(B_MODEL_PATH)
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=1)
|
||||||
|
def _load_t_model():
|
||||||
|
"""Load and cache model T."""
|
||||||
|
logger.info("Loading model T from %s", T_MODEL_PATH)
|
||||||
|
return joblib.load(T_MODEL_PATH)
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=None)
|
||||||
|
def _load_category_orders_cached(path: Path) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Cache category orders per path to avoid disk I/O on each scoring.
|
||||||
|
"""
|
||||||
|
logger.info("Loading category orders from %s", path)
|
||||||
|
return _load_category_orders(path)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_expected_features(model: Any, df: pd.DataFrame) -> List[str]:
|
||||||
|
"""
|
||||||
|
Get the expected feature names from the model.
|
||||||
|
|
||||||
|
If the model has no 'feature_names' attribute, fall back to df columns.
|
||||||
|
This is a defensive measure; ideally, feature names should always
|
||||||
|
be stored with the model.
|
||||||
|
"""
|
||||||
|
feature_names = getattr(model, "feature_names", None)
|
||||||
|
if feature_names is None:
|
||||||
|
logger.warning(
|
||||||
|
"Model has no attribute 'feature_names'; using DataFrame columns order."
|
||||||
|
)
|
||||||
|
feature_names = list(df.columns)
|
||||||
|
return list(feature_names)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Preprocessing helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
MISSING_SENTINELS = [None, "", "null", np.nan, "nan", " "]
|
||||||
|
|
||||||
|
|
||||||
|
def _to_string_category(series: pd.Series) -> pd.Series:
|
||||||
|
"""
|
||||||
|
Force a categorical series whose categories are strings (not floats),
|
||||||
|
backed by NumPy object dtype (not pandas StringDtype) for XGBoost
|
||||||
|
compatibility.
|
||||||
|
"""
|
||||||
|
s = series.copy()
|
||||||
|
s.replace(MISSING_SENTINELS, np.nan, inplace=True)
|
||||||
|
# Use classic Python strings (object dtype), not pandas' StringDtype,
|
||||||
|
# so that XGBoost's numpy-based dtype checks work correctly.
|
||||||
|
s = s.astype(str)
|
||||||
|
return s.astype("category")
|
||||||
|
|
||||||
|
|
||||||
|
def _sanitize_expected_feature_dtypes(
|
||||||
|
df: pd.DataFrame,
|
||||||
|
expected_features: List[str],
|
||||||
|
categorical_feature_names: List[str],
|
||||||
|
) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
XGBoost DMatrix does NOT allow object dtype.
|
||||||
|
|
||||||
|
For each expected feature column:
|
||||||
|
- If dtype is numeric or bool, keep as-is.
|
||||||
|
- If categorical with float categories, convert to string categories.
|
||||||
|
- If object:
|
||||||
|
* try numeric coercion
|
||||||
|
* if still not usable, cast to string category.
|
||||||
|
|
||||||
|
This mirrors the safety checks needed to satisfy the
|
||||||
|
XGBoost 3.x pandas backend (`enable_categorical=True`).
|
||||||
|
"""
|
||||||
|
df = df.copy()
|
||||||
|
categorical_set = set(categorical_feature_names)
|
||||||
|
|
||||||
|
for col in expected_features:
|
||||||
|
if col not in df.columns:
|
||||||
|
# Ensure column exists so downstream checks don't fail here.
|
||||||
|
df[col] = np.nan
|
||||||
|
|
||||||
|
dtype = df[col].dtype
|
||||||
|
|
||||||
|
# If this feature is known to be categorical from training-time
|
||||||
|
# category_orders, assume _prepare_* already produced a proper
|
||||||
|
# pandas Categorical with the training categories and leave it
|
||||||
|
# untouched so that category codes match training.
|
||||||
|
if col in categorical_set:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# For non-categorical features, XGBoost expects numeric or bool.
|
||||||
|
if is_bool_dtype(dtype) or is_integer_dtype(dtype) or is_float_dtype(dtype):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Anything else (object, string, unexpected categorical) -> numeric coercion.
|
||||||
|
numeric = pd.to_numeric(df[col], errors="coerce")
|
||||||
|
df[col] = numeric
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
def _prepare_a(df: pd.DataFrame, category_orders: Dict[str, List[Any]]) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
Prepare features for model A.
|
||||||
|
|
||||||
|
For each column with category orders:
|
||||||
|
- If all category labels are numeric-like, coerce both labels and
|
||||||
|
data to floats and build a numeric categorical.
|
||||||
|
- Otherwise, treat as string categories.
|
||||||
|
"""
|
||||||
|
df = df.copy()
|
||||||
|
for col, raw_categories in category_orders.items():
|
||||||
|
if col not in df.columns:
|
||||||
|
df[col] = np.nan
|
||||||
|
|
||||||
|
df[col].replace(MISSING_SENTINELS, np.nan, inplace=True)
|
||||||
|
|
||||||
|
# Detect whether category labels are numeric-like and, if so,
|
||||||
|
# map numeric values onto the canonical string labels used
|
||||||
|
# during training (e.g. -4 -> "-4.0").
|
||||||
|
numeric_like = True
|
||||||
|
numeric_label_map: Dict[float, str] = {}
|
||||||
|
for v in raw_categories:
|
||||||
|
try:
|
||||||
|
numeric_label_map[float(v)] = str(v)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
numeric_like = False
|
||||||
|
break
|
||||||
|
|
||||||
|
if numeric_like:
|
||||||
|
def _map_value(val: Any) -> Any:
|
||||||
|
if pd.isna(val):
|
||||||
|
return np.nan
|
||||||
|
try:
|
||||||
|
key = float(val)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return np.nan
|
||||||
|
return numeric_label_map.get(key, np.nan)
|
||||||
|
|
||||||
|
df[col] = df[col].map(_map_value)
|
||||||
|
df[col] = pd.Categorical(df[col], categories=raw_categories, ordered=True)
|
||||||
|
else:
|
||||||
|
# Pure string categories: coerce to plain strings
|
||||||
|
df[col] = df[col].astype(str)
|
||||||
|
df[col] = pd.Categorical(df[col], categories=raw_categories, ordered=True)
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
def _prepare_with_lower(df: pd.DataFrame, category_orders: Dict[str, List[Any]]) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
Shared preparation logic for models B and T where
|
||||||
|
categorical values are lowercased strings.
|
||||||
|
"""
|
||||||
|
df = df.copy()
|
||||||
|
for col, raw_categories in category_orders.items():
|
||||||
|
if col not in df.columns:
|
||||||
|
df[col] = np.nan
|
||||||
|
|
||||||
|
# Normalize missing-like representations
|
||||||
|
df[col].replace(MISSING_SENTINELS, np.nan, inplace=True)
|
||||||
|
|
||||||
|
# Detect whether category labels are numeric-like and, if so,
|
||||||
|
# map numeric values onto the canonical string labels used
|
||||||
|
# during training (e.g. -4 -> "-4.0"). Otherwise, treat
|
||||||
|
# them as lowercased string categories.
|
||||||
|
numeric_like = True
|
||||||
|
numeric_label_map: Dict[float, str] = {}
|
||||||
|
for v in raw_categories:
|
||||||
|
try:
|
||||||
|
numeric_label_map[float(v)] = str(v)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
numeric_like = False
|
||||||
|
break
|
||||||
|
|
||||||
|
if numeric_like:
|
||||||
|
def _map_value(val: Any) -> Any:
|
||||||
|
if pd.isna(val):
|
||||||
|
return np.nan
|
||||||
|
try:
|
||||||
|
key = float(val)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return np.nan
|
||||||
|
return numeric_label_map.get(key, np.nan)
|
||||||
|
|
||||||
|
df[col] = df[col].map(_map_value)
|
||||||
|
df[col] = pd.Categorical(df[col], categories=raw_categories, ordered=True)
|
||||||
|
else:
|
||||||
|
# String categories: lower-case string representation
|
||||||
|
df[col] = df[col].astype(str).str.lower()
|
||||||
|
df[col] = pd.Categorical(df[col], categories=raw_categories, ordered=True)
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
def _prepare_b(df: pd.DataFrame, category_orders: Dict[str, List[Any]]) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
Prepare features for model B (lowercased categorical values).
|
||||||
|
"""
|
||||||
|
return _prepare_with_lower(df, category_orders)
|
||||||
|
|
||||||
|
|
||||||
|
def _prepare_t(df: pd.DataFrame, category_orders: Dict[str, List[Any]]) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
Prepare features for model T (lowercased categorical values).
|
||||||
|
"""
|
||||||
|
return _prepare_with_lower(df, category_orders)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Per-model processing functions
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def processing_a(input_data: pd.DataFrame) -> float:
|
||||||
|
"""
|
||||||
|
Run model A on input_data and return the first prediction as float.
|
||||||
|
"""
|
||||||
|
df = pd.DataFrame(input_data)
|
||||||
|
if df.empty:
|
||||||
|
raise ValueError("Input DataFrame for model A is empty.")
|
||||||
|
|
||||||
|
model = _load_a_model()
|
||||||
|
category_orders = _load_category_orders_cached(A_CATEGORY_ORDERS_PATH)
|
||||||
|
df = _prepare_a(df, category_orders)
|
||||||
|
|
||||||
|
expected_features = _get_expected_features(model, df)
|
||||||
|
df = _sanitize_expected_feature_dtypes(df, expected_features, list(category_orders.keys()))
|
||||||
|
# Ensure all expected features exist in df
|
||||||
|
missing_features = set(expected_features) - set(df.columns)
|
||||||
|
if missing_features:
|
||||||
|
raise KeyError(
|
||||||
|
f"Missing expected features for model A: {sorted(missing_features)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
dmatrix = xgb.DMatrix(df[expected_features], enable_categorical=True, missing=np.nan)
|
||||||
|
predictions = model.predict(dmatrix)
|
||||||
|
|
||||||
|
if len(predictions) == 0:
|
||||||
|
raise RuntimeError("Model A returned no predictions.")
|
||||||
|
|
||||||
|
pd_a = float(predictions[0])
|
||||||
|
return pd_a
|
||||||
|
|
||||||
|
|
||||||
|
def processing_b(input_data: pd.DataFrame) -> float:
|
||||||
|
"""
|
||||||
|
Run model B on input_data and return the first prediction as float.
|
||||||
|
"""
|
||||||
|
df = pd.DataFrame(input_data)
|
||||||
|
if df.empty:
|
||||||
|
raise ValueError("Input DataFrame for model B is empty.")
|
||||||
|
|
||||||
|
model = _load_b_model()
|
||||||
|
category_orders = _load_category_orders_cached(B_CATEGORY_ORDERS_PATH)
|
||||||
|
df = _prepare_b(df, category_orders)
|
||||||
|
|
||||||
|
expected_features = _get_expected_features(model, df)
|
||||||
|
df = _sanitize_expected_feature_dtypes(df, expected_features, list(category_orders.keys()))
|
||||||
|
missing_features = set(expected_features) - set(df.columns)
|
||||||
|
if missing_features:
|
||||||
|
raise KeyError(
|
||||||
|
f"Missing expected features for model B: {sorted(missing_features)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
dmatrix = xgb.DMatrix(df[expected_features], enable_categorical=True, missing=np.nan)
|
||||||
|
predictions = model.predict(dmatrix)
|
||||||
|
|
||||||
|
if len(predictions) == 0:
|
||||||
|
raise RuntimeError("Model B returned no predictions.")
|
||||||
|
|
||||||
|
pd_b = float(predictions[0])
|
||||||
|
return pd_b
|
||||||
|
|
||||||
|
|
||||||
|
def processing_t(input_data: pd.DataFrame) -> float:
|
||||||
|
"""
|
||||||
|
Run model T on input_data and return the first prediction as float.
|
||||||
|
"""
|
||||||
|
df = pd.DataFrame(input_data)
|
||||||
|
if df.empty:
|
||||||
|
raise ValueError("Input DataFrame for model T is empty.")
|
||||||
|
|
||||||
|
model = _load_t_model()
|
||||||
|
category_orders = _load_category_orders_cached(T_CATEGORY_ORDERS_PATH)
|
||||||
|
df = _prepare_t(df, category_orders)
|
||||||
|
|
||||||
|
expected_features = _get_expected_features(model, df)
|
||||||
|
df = _sanitize_expected_feature_dtypes(df, expected_features, list(category_orders.keys()))
|
||||||
|
missing_features = set(expected_features) - set(df.columns)
|
||||||
|
if missing_features:
|
||||||
|
raise KeyError(
|
||||||
|
f"Missing expected features for model T: {sorted(missing_features)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
dmatrix = xgb.DMatrix(df[expected_features], enable_categorical=True, missing=np.nan)
|
||||||
|
predictions = model.predict(dmatrix)
|
||||||
|
|
||||||
|
if len(predictions) == 0:
|
||||||
|
raise RuntimeError("Model T returned no predictions.")
|
||||||
|
|
||||||
|
pd_t = float(predictions[0])
|
||||||
|
return pd_t
|
||||||
|
|
||||||
|
|
||||||
|
def processing_all(
|
||||||
|
df_a: pd.DataFrame,
|
||||||
|
df_b: pd.DataFrame,
|
||||||
|
df_t: pd.DataFrame,
|
||||||
|
) -> Tuple[float, float, float]:
|
||||||
|
"""
|
||||||
|
Convenience function to run all three models and return their predictions.
|
||||||
|
"""
|
||||||
|
return (
|
||||||
|
processing_a(df_a),
|
||||||
|
processing_b(df_b),
|
||||||
|
processing_t(df_t),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Main entrypoint for batch-style input
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def __main__(results: List[Dict[str, Any]]) -> Tuple[float, float, float]:
|
||||||
|
"""
|
||||||
|
Main entrypoint for processing a list of results dicts.
|
||||||
|
|
||||||
|
Expected shape of each element in `results`:
|
||||||
|
{
|
||||||
|
"model_a_features": { ... feature_name: value ... },
|
||||||
|
"model_b_features": { ... feature_name: value ... },
|
||||||
|
"model_t_features": { ... feature_name: value ... },
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
logger.info("Data received in processing block: %s", results)
|
||||||
|
|
||||||
|
df = pd.DataFrame(results)
|
||||||
|
if df.empty:
|
||||||
|
raise ValueError("Input results list is empty.")
|
||||||
|
|
||||||
|
if not {"model_a_features", "model_b_features", "model_t_features"}.issubset(df.columns):
|
||||||
|
missing = {
|
||||||
|
"model_a_features",
|
||||||
|
"model_b_features",
|
||||||
|
"model_t_features",
|
||||||
|
} - set(df.columns)
|
||||||
|
raise KeyError(
|
||||||
|
f"Missing expected keys in results: {sorted(missing)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Each cell of these columns is expected to be a dict-like object
|
||||||
|
df_a = pd.DataFrame(list(df["model_a_features"]))
|
||||||
|
df_b = pd.DataFrame(list(df["model_b_features"]))
|
||||||
|
df_t = pd.DataFrame(list(df["model_t_features"]))
|
||||||
|
|
||||||
|
pd_a, pd_b, pd_t = processing_all(df_a, df_b, df_t)
|
||||||
return {
|
return {
|
||||||
"meta_info": [
|
"pd_a": pd_a,
|
||||||
{
|
"pd_b": pd_b,
|
||||||
"name": "created_date",
|
"pd_t": pd_t,
|
||||||
"type": "string",
|
|
||||||
"value": "2024-11-05"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"fields": [
|
|
||||||
{
|
|
||||||
"name": "",
|
|
||||||
"type": "",
|
|
||||||
"value": ""
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
|
|||||||
15
category_orders_train_A.json
Normal file
15
category_orders_train_A.json
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
{
|
||||||
|
"G300S": [
|
||||||
|
"-1.0",
|
||||||
|
"-2.0",
|
||||||
|
"-4.0",
|
||||||
|
"0.0",
|
||||||
|
"1.0",
|
||||||
|
"10.0",
|
||||||
|
"2.0",
|
||||||
|
"3.0",
|
||||||
|
"4.0",
|
||||||
|
"5.0",
|
||||||
|
"9.0"
|
||||||
|
]
|
||||||
|
}
|
||||||
2728
category_orders_train_B.json
Normal file
2728
category_orders_train_B.json
Normal file
File diff suppressed because it is too large
Load Diff
47386
category_orders_train_T.json
Normal file
47386
category_orders_train_T.json
Normal file
File diff suppressed because it is too large
Load Diff
@ -1 +1,11 @@
|
|||||||
{}
|
{
|
||||||
|
"$schema": "http://json-schema.org/draft-07/schema",
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"results": {
|
||||||
|
"type": ["array", "null"],
|
||||||
|
"items": {"type": "object"}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": []
|
||||||
|
}
|
||||||
@ -1 +1,4 @@
|
|||||||
{}
|
joblib==1.5.2
|
||||||
|
numpy==2.2.6
|
||||||
|
pandas==2.2.3
|
||||||
|
xgboost==3.1.1
|
||||||
@ -1 +1,11 @@
|
|||||||
{}
|
{
|
||||||
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"pd_a": { "type": "number" },
|
||||||
|
"pd_b": { "type": "number" },
|
||||||
|
"pd_t": { "type": "number" }
|
||||||
|
},
|
||||||
|
"required": ["pd_a", "pd_b", "pd_t"],
|
||||||
|
"additionalProperties": false
|
||||||
|
}
|
||||||
18
test_block.py
Normal file
18
test_block.py
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
import unittest
|
||||||
|
from block import __main__
|
||||||
|
# from reference import __main__
|
||||||
|
|
||||||
|
|
||||||
|
data = [{'model_a_features': {'AEPMAG05': 297, 'RET201': None, 'PER201': 1.0, 'PER202': 1.0, 'PER222': 1.0, 'PER225': 1.51, 'PER235': 1949, 'CTM18': None, 'SC20S': None, 'AT36SD': 6, 'FI36SD': 999, 'G250BD': 1, 'G250CD': 1, 'US36SD': 999, 'CV13': 3.0, 'CV25': 0.0, 'CV26': 3.0, 'AT01S': 36, 'AT104S': 8, 'FI02S': 1, 'FI20S': 119, 'FI35S': 1225, 'G051S': 3, 'G205S': 611, 'G210S': None, 'G218A': 0, 'G225S': 606, 'G230S': None, 'G234S': None, 'G300S': 1, 'IN02S': 1, 'IN12S': 1, 'OF20S': None, 'RT20S': 114, 'INAP01': 175, 'G106S': 220, 'US02S': 1, 'US20S': 119, 'US24S': 1, 'US28S': 1449, 'US32S': 1225, 'US35S': 1225, 'US36S': None, 'SE20S': None, 'US51A': 12, 'G205B': 611, 'INST_TRD': 28, 'RTL_TRD': 8, 'AGG402': 210, 'AGG403': 205, 'AGG423': 1344, 'AGG424': 484, 'AGG903': 1, 'TRV03': 3, 'TRV04': 5, 'BALMAG01': 188, 'score_results': 603, 'PER201_unk': 0, 'G225S_unk': 0, 'SC20S_unk': 1, 'RET201_unk': 1, 'US24S_unk': 0}, 'model_b_features': {'UTLMAG01': 112, 'AEPMAG04': 286, 'PER201': 1.0, 'PER203': 1.31, 'PER222': 1.0, 'PER223': 1.31, 'PER224': 1.15, 'PER225': 1.51, 'PER235': 1949, 'CTM23': None, 'CT321': 65, 'CTC20': None, 'CTA17': 24, 'CTA18': 5, 'SC21S': None, 'SCC92': None, 'SCBALM01': None, 'AT36SD': 6, 'FI36SD': 999, 'RE36SD': 6, 'SE36SD': None, 'US36SD': 999, 'LQA232YR': 100.0, 'LQR325YR': -255.0, 'RLE902': None, 'CV25': 0.0, 'CV26': 3.0, 'RVDEXQ2': 9, 'AT01S': 36, 'AT104S': 8, 'AU20S': None, 'BI21S': None, 'BR33S': 1148, 'CO06S': None, 'FI02S': 1, 'FI03S': 1, 'FI20S': 119, 'FI32S': 1225, 'FI33S': 1225, 'FI34S': 85, 'FI35S': 1225, 'FI101S': 1225, 'FR21S': None, 'FR32S': None, 'G020S': 11, 'G102S': None, 'G205S': 611, 'G210S': None, 'G213A': None, 'G225S': 606, 'G234S': None, 'G301S': 1, 'G990S': None, 'IN02S': 1, 'IN12S': 1, 'MT21S': None, 'OF09S': None, 'OF21S': None, 'OF29S': None, 'OF35S': None, 'RE32S': 2384, 'RT36S': 6, 'ST01S': 0, 'INAP01': 175, 'G106S': 220, 'S204S': None, 'US02S': 1, 'US03S': 1, 'US12S': 1, 'US20S': 119, 'US24S': 1, 'US30S': 100.0, 'US34S': 85, 'SE20S': None, 'SE21S': None, 'SE34S': None, 'SE36S': None, 'JT20S': None, 'JT33S': None, 'JT70S': None, 'G404S': None, 'G405S': None, 'G406S': None, 'G407S': None, 'G416S': None, 'G417S': None, 'US51A': 12, 'INST_TRD': 28, 'NOMT_TRD': 36, 'AGG512': 1169, 'AGG516': 1193, 'AGG902': 2, 'AGG903': 1, 'TRV03': 3, 'TRV04': 5, 'TRV06': 8, 'BALMAG01': 188, 'RVLR14': 'RTRRRRRR', 'PAYMNT06': 1.31, 'PAYMNT07': 1.0, 'score_results': 603}, 'model_t_features': {'PDMAG01': 310, 'AEPMAG05': 297, 'AUT201': None, 'PER201': 1.0, 'PER203': 1.31, 'PER204': 1.15, 'PER205': 1.48, 'PER223': 1.31, 'PER225': 1.51, 'PER253': 54, 'CTA17': 24, 'SE21CD': None, 'RLE907': None, 'CV26': 3.0, 'AT35B': 1037, 'FI28S': 1449, 'FI32S': 1225, 'INAP01': 175, 'US01S': 26, 'US28S': 1449, 'US34S': 85, 'US101S': 1225, 'SE02S': None, 'SE06S': None, 'SE09S': None, 'SE20S': None, 'TRV06': 8, 'TRV10': 12, 'PAYMNT06': 1.31, 'AEPMAG05_unk': 0, 'PER201_unk': 0}}]
|
||||||
|
|
||||||
|
class TestBlock(unittest.TestCase):
|
||||||
|
def test_main_returns_scores(self):
|
||||||
|
block_result = __main__(data)
|
||||||
|
print(block_result)
|
||||||
|
self.assertIsInstance(block_result, dict)
|
||||||
|
self.assertIn("pd_a", block_result)
|
||||||
|
self.assertIn("pd_b", block_result)
|
||||||
|
self.assertIn("pd_t", block_result)
|
||||||
|
|
||||||
|
if __name__ == "__main__": # pragma: no cover
|
||||||
|
unittest.main()
|
||||||
BIN
xgboost_model_A.joblib
Normal file
BIN
xgboost_model_A.joblib
Normal file
Binary file not shown.
BIN
xgboost_model_B.joblib
Normal file
BIN
xgboost_model_B.joblib
Normal file
Binary file not shown.
BIN
xgboost_model_T.joblib
Normal file
BIN
xgboost_model_T.joblib
Normal file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user