diff --git a/README.md b/README.md index 59a3efc..65bfadf 100644 --- a/README.md +++ b/README.md @@ -1 +1,10 @@ -**Hello world!!!** +## Overview +This block (`block.py`) is responsible for loading and scoring the model. + +## Key Inputs & Outputs +- **Request**: Refer to `request_schema.json` for detailed input fields and validation rules. +- **Response**: Refer to `response_schema.json` for the returned structure and data types. + +## Implementation Details +- All core logic resides in `block.py` within the `__main__` function. +- Example usage and validation are demonstrated in `test_block.py`. diff --git a/block.py b/block.py index 3b227f9..82373ca 100644 --- a/block.py +++ b/block.py @@ -1,21 +1,65 @@ -@flowx_block -def example_function(request: dict) -> dict: +import logging +import xgboost as xgb +import joblib +import pandas as pd - # Processing logic here... +# Configure logging +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(name)s - %(message)s", +) +logger = logging.getLogger(__name__) - return { - "meta_info": [ - { - "name": "created_date", - "type": "string", - "value": "2024-11-05" - } - ], - "fields": [ - { - "name": "", - "type": "", - "value": "" - } - ] - } +def __main__(pti : float, score_results : float, BALMAG01 : float, revolving_amount_monthly_payment : float, closed_with_balance_amount_current_balance : float, AT31S : int, AT20S : int, BC21S : int, record_counts_revolving_trade_count : int, record_counts_total_trade_count : int, PAYMNT10 : float, AGG102 : float, total_amount_high_credit : float, revolving_amount_current_balance : float, total_amount_current_balance : float, REV83 : float, revolving_amount_high_credit : float, closed_with_balance_amount_monthly_payment : float, revolving_amount_percent_available_credit : float, AGG101 : float, revolving_amount_credit_limit : float, AT09S : int, US01S : int)->dict: + + input_data = { + "pti": pti, "score_results": score_results, "BALMAG01": BALMAG01, + "revolving_amount_monthly_payment": revolving_amount_monthly_payment, + "closed_with_balance_amount_current_balance": closed_with_balance_amount_current_balance, + "AT31S": AT31S, "AT20S": AT20S, "BC21S": BC21S, + "record_counts_revolving_trade_count": record_counts_revolving_trade_count, + "record_counts_total_trade_count": record_counts_total_trade_count, "PAYMNT10": PAYMNT10, + "AGG102": AGG102, "total_amount_high_credit": total_amount_high_credit, + "revolving_amount_current_balance": revolving_amount_current_balance, + "total_amount_current_balance": total_amount_current_balance, "REV83": REV83, + "revolving_amount_high_credit": revolving_amount_high_credit, + "closed_with_balance_amount_monthly_payment": closed_with_balance_amount_monthly_payment, + "revolving_amount_percent_available_credit": revolving_amount_percent_available_credit, + "AGG101": AGG101, "revolving_amount_credit_limit": revolving_amount_credit_limit, + "AT09S": AT09S, "US01S": US01S, "has_mortgage": None + } + + # Load model + try: + model = joblib.load("./xgboost_model.joblib") + # model = joblib.load("C:/Users/cbollu/Downloads/test_blocks/test_blocks/sequence-1/pd_v2_processing/xgboost_model.joblib") + except Exception as e: + logger.exception("An unexpected error occurred while loading the model.") + raise e + df_pre_processed = pd.DataFrame(input_data, index=[0]) + + if df_pre_processed.empty: + print("PD V2 Pre Processed DataFrame is empty.") + + expected_features = model.feature_names + actual_features = df_pre_processed.columns.tolist() + missing_features = [feature for feature in expected_features if feature not in actual_features] + + # Add missing features as None (NaN) values + for feature in missing_features: + df_pre_processed[feature] = None + + # Convert object columns to categorical + for col in df_pre_processed.columns: + if df_pre_processed[col].dtype == 'object': + df_pre_processed[col] = pd.Categorical(df_pre_processed[col]) + + # Prepare data for prediction + dmatrix = xgb.DMatrix(df_pre_processed[expected_features], enable_categorical=True) + + # Make prediction + prediction = model.predict(dmatrix)[0] + + logger.info(f"PD V2 Predicted Score: {prediction}") + + return {"probability": float(prediction)} \ No newline at end of file diff --git a/request_schema.json b/request_schema.json index 0967ef4..f76d81e 100644 --- a/request_schema.json +++ b/request_schema.json @@ -1 +1,99 @@ -{} +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "pti": { + "type": ["number", "null"], + "description": "external + internal monthly payment to income ratio" + }, + "score_results": { + "type": ["number", "null"], + "description": "TransUnion score" + }, + "BALMAG01": { + "type": ["number", "null"], + "description": "Non-mortgage balance magnitude" + }, + "revolving_amount_monthly_payment": { + "type": ["number", "null"], + "description": "Minimum amount the borrower is required to pay each month to maintain the account in good standing" + }, + "closed_with_balance_amount_current_balance": { + "type": ["number", "null"], + "description": "The current balance of closed credit accounts" + }, + "AT31S": { + "type": ["integer", "null"], + "description": "Percentage of open trades > 75% of credit line verified in past 12 months" + }, + "AT20S": { + "type": ["integer", "null"], + "description": "Months since oldest trade opened" + }, + "BC21S": { + "type": ["integer", "null"], + "description": "Months since most recent credit card trade opened" + }, + "record_counts_revolving_trade_count": { + "type": ["integer", "null"], + "description": "Records in the database related to revolving trade accounts (a credit card account)" + }, + "record_counts_total_trade_count": { + "type": ["integer", "null"], + "description": "Total number of trade-related (transaction) records" + }, + "PAYMNT10": { + "type": ["number", "null"], + "description": "Number of payments in the last quarter" + }, + "AGG102": { + "type": ["number", "null"], + "description": "Aggregate non-mortgage balances for month 2" + }, + "total_amount_high_credit": { + "type": ["number", "null"], + "description": "The highest credit amount extended across all credit accounts" + }, + "revolving_amount_current_balance": { + "type": ["number", "null"], + "description": "The current owed balance on revolving credit accounts" + }, + "total_amount_current_balance": { + "type": ["number", "null"], + "description": "The total current balance across all credit accounts" + }, + "REV83": { + "type": ["number", "null"], + "description": "Months since a revolving account last exceeded 75% utilization" + }, + "revolving_amount_high_credit": { + "type": ["number", "null"], + "description": "The highest credit amount that has been extended to the borrower in revolving credit accounts" + }, + "closed_with_balance_amount_monthly_payment": { + "type": ["number", "null"], + "description": "The monthly payment amount for closed credit accounts (loans)" + }, + "revolving_amount_percent_available_credit": { + "type": ["number", "null"], + "description": "The percentage of available credit that has been utilized in revolving credit accounts" + }, + "AGG101": { + "type": ["number", "null"], + "description": "Aggregate non-mortgage balances for month 1" + }, + "revolving_amount_credit_limit": { + "type": ["number", "null"], + "description": "The total credit limit on revolving credit accounts" + }, + "AT09S": { + "type": ["integer", "null"], + "description": "Number of trades opened in past 24 months" + }, + "US01S": { + "type": ["integer", "null"], + "description": "Number of unsecured installment trades" + } + }, + "required": [] +} diff --git a/requirements.txt b/requirements.txt index 0967ef4..c4c4de5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,5 @@ -{} +jsonschema==4.23.0 +xgboost==1.7.5 +joblib==1.3.2 +pandas==2.2.2 +numpy==1.23.5 \ No newline at end of file diff --git a/response_schema.json b/response_schema.json index 0967ef4..0c5408f 100644 --- a/response_schema.json +++ b/response_schema.json @@ -1 +1,10 @@ -{} +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "probability": { + "type": "number", + "description": "Model predicted score." + } + } + } \ No newline at end of file diff --git a/test_block.py b/test_block.py new file mode 100644 index 0000000..b2e71ae --- /dev/null +++ b/test_block.py @@ -0,0 +1,15 @@ +import unittest +from block import __main__ + +class TestBlock(unittest.TestCase): + + def test_main_success(self): + result = __main__(pti= 0.3277136364,score_results= 600.0,BALMAG01= 196.0,revolving_amount_monthly_payment= 56.0,closed_with_balance_amount_current_balance= 8411.0,AT31S= 71.0,AT20S= 166.0,BC21S= 4.0,record_counts_revolving_trade_count= 9.0,record_counts_total_trade_count= 18.0,PAYMNT10= 4.0,AGG102= 24994.0,total_amount_high_credit= 53807.0,revolving_amount_current_balance= 1635.0,total_amount_current_balance= 38353.0,REV83= 0.0,revolving_amount_high_credit= 1720.0,closed_with_balance_amount_monthly_payment= 0.0,revolving_amount_percent_available_credit= 18.0,AGG101= 11043.0,revolving_amount_credit_limit= 2000.0,AT09S= 4.0,US01S= 0.0) + self.assertAlmostEqual(result['probability'], 0.33663413, places=7) + + # def test_main_invalid_input(self): + # with self.assertRaises(TypeError): + # __main__(pti= 231,score_results= 600.0,BALMAG01= 196.0,revolving_amount_monthly_payment= 56.0,closed_with_balance_amount_current_balance= 8411.0,AT31S= 71.0,AT20S= 166.0,BC21S= 4.0,record_counts_revolving_trade_count= 9.0,record_counts_total_trade_count= 18.0,PAYMNT10= 4.0,AGG102= 24994.0,total_amount_high_credit= 53807.0,revolving_amount_current_balance= 1635.0,total_amount_current_balance= 38353.0,REV83= 0.0,revolving_amount_high_credit= 1720.0,closed_with_balance_amount_monthly_payment= 0.0,revolving_amount_percent_available_credit= 18.0,AGG101= 11043.0,revolving_amount_credit_limit= 2000.0,AT09S= 4.0,US01S= 0.0) + +if __name__ == "__main__": + unittest.main() diff --git a/xgboost_model.joblib b/xgboost_model.joblib new file mode 100644 index 0000000..0199798 Binary files /dev/null and b/xgboost_model.joblib differ