2025-02-05 19:13:43 +00:00
|
|
|
import logging
|
|
|
|
|
import joblib
|
|
|
|
|
import xgboost as xgb
|
|
|
|
|
import pandas as pd
|
|
|
|
|
import json
|
|
|
|
|
import math
|
|
|
|
|
|
|
|
|
|
# Configure logging
|
|
|
|
|
logging.basicConfig(
|
|
|
|
|
level=logging.INFO,
|
|
|
|
|
format="%(asctime)s [%(levelname)s] %(name)s - %(message)s",
|
|
|
|
|
)
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
# with open('C:/Users/abinisha/flowx/kiwi-blocks/sequence-2/fraud_v1_pre_processing/category_orders_train.json', 'r') as f:
|
|
|
|
|
with open('./category_orders_train.json', 'r') as f:
|
|
|
|
|
category_orders = json.load(f)
|
|
|
|
|
|
|
|
|
|
def __main__(user_age: int, persona_entity_confidence_score: float, persona_selfie_similarity_score_right: float,
|
|
|
|
|
persona_selfie_similarity_score_left: float, persona_hesitation_percentage: float,
|
|
|
|
|
persona_hesitation_count: float, device_id_age_max: int, selfie_consistency_score_avg: float,
|
|
|
|
|
device_consistency: int, selfie_consistency_score: float, global_fs_ls: int, inquiry_frequency: int,
|
|
|
|
|
confidence_score_min: float, contract_date_fs_sub: int, browser_os: str, user_city_ip_match: int,
|
|
|
|
|
device_id_age_avg: float, persona_distraction_events: float, sub_fs_ls: int, device_id_age_min: int,
|
|
|
|
|
confidence_score_max: float, persona_phone_risk_score: float, ip_address_risk_level: str,
|
|
|
|
|
login_frequency: float, suspect_score: int, confidence_score: float, name_consistency: int,
|
|
|
|
|
ip_location_consistency: int) -> dict:
|
|
|
|
|
|
|
|
|
|
input_data = {
|
|
|
|
|
"user_age": user_age,
|
|
|
|
|
"persona_entity_confidence_score": persona_entity_confidence_score,
|
|
|
|
|
"persona_selfie_similarity_score_right": persona_selfie_similarity_score_right,
|
|
|
|
|
"persona_selfie_similarity_score_left": persona_selfie_similarity_score_left,
|
|
|
|
|
"persona_hesitation_percentage": persona_hesitation_percentage,
|
|
|
|
|
"persona_hesitation_count": persona_hesitation_count,
|
|
|
|
|
"device_id_age_max": device_id_age_max,
|
|
|
|
|
"selfie_consistency_score_avg": selfie_consistency_score_avg,
|
|
|
|
|
"device_consistency": device_consistency,
|
|
|
|
|
"selfie_consistency_score": selfie_consistency_score,
|
|
|
|
|
"global_fs_ls": global_fs_ls,
|
|
|
|
|
"inquiry_frequency": inquiry_frequency,
|
|
|
|
|
"confidence_score_min": confidence_score_min,
|
|
|
|
|
"contract_date_fs_sub": contract_date_fs_sub,
|
|
|
|
|
"browser_os": browser_os,
|
|
|
|
|
"user_city_ip_match": user_city_ip_match,
|
|
|
|
|
"device_id_age_avg": device_id_age_avg,
|
|
|
|
|
"persona_distraction_events": persona_distraction_events,
|
|
|
|
|
"sub_fs_ls": sub_fs_ls,
|
|
|
|
|
"device_id_age_min": device_id_age_min,
|
|
|
|
|
"confidence_score_max": confidence_score_max,
|
|
|
|
|
"persona_phone_risk_score": persona_phone_risk_score,
|
|
|
|
|
"ip_address_risk_level": ip_address_risk_level,
|
|
|
|
|
"login_frequency": login_frequency,
|
|
|
|
|
"suspect_score": suspect_score,
|
|
|
|
|
"confidence_score": confidence_score,
|
|
|
|
|
"name_consistency": name_consistency,
|
|
|
|
|
"ip_location_consistency": ip_location_consistency
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# Load the model
|
|
|
|
|
model = joblib.load("./xgboost_model.joblib")
|
|
|
|
|
# model = joblib.load("C:/Users/abinisha/flowx/kiwi-blocks/sequence-2/fraud_v1_processing/xgboost_model.joblib")
|
|
|
|
|
|
|
|
|
|
df = pd.DataFrame(input_data, index=[False])
|
|
|
|
|
|
|
|
|
|
# Ensure categorical columns are treated as categories
|
|
|
|
|
categorical_columns = ['browser_os', 'ip_address_risk_level']
|
|
|
|
|
for col in categorical_columns:
|
|
|
|
|
if col in df.columns:
|
|
|
|
|
df[col] = df[col].str.lower().replace([None, "", "null", math.nan], "none")
|
|
|
|
|
df[col] = pd.Categorical(df[col], categories=category_orders.get(col, []))
|
|
|
|
|
|
|
|
|
|
# Ensure all columns are numeric where possible
|
|
|
|
|
for col in df.columns:
|
|
|
|
|
if col not in categorical_columns:
|
|
|
|
|
df[col] = pd.to_numeric(df[col], errors='ignore')
|
|
|
|
|
|
|
|
|
|
model_feature_names = model.feature_names
|
|
|
|
|
|
|
|
|
|
dmatrix = xgb.DMatrix(df[model_feature_names], enable_categorical=True)
|
|
|
|
|
|
|
|
|
|
prediction = model.predict(dmatrix)[0]
|
|
|
|
|
|
|
|
|
|
logger.info(f"Fraud V1 Predicted Score: {prediction}")
|
|
|
|
|
|
|
|
|
|
return {'probability': float(prediction)}
|