87 lines
4.2 KiB
Python
87 lines
4.2 KiB
Python
import logging
|
|
import pandas as pd
|
|
import math
|
|
import json
|
|
|
|
# Configure logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format="%(asctime)s [%(levelname)s] %(name)s - %(message)s",
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# with open('C:/Users/abinisha/flowx/kiwi-blocks/sequence-2/fraud_v1_pre_processing/category_orders_train.json', 'r') as f:
|
|
# category_orders = json.load(f)
|
|
|
|
def __main__(user_age: int,persona_entity_confidence_score: float,persona_selfie_similarity_score_right: float,
|
|
persona_selfie_similarity_score_left: float,persona_hesitation_percentage: float,
|
|
persona_hesitation_count: float,device_id_age_max: int,selfie_consistency_score_avg: float,
|
|
device_consistency: int,selfie_consistency_score: float,global_fs_ls: int,inquiry_frequency: int,
|
|
confidence_score_min: float,contract_date_fs_sub: int,browser_os: str,user_city_ip_match: int,
|
|
device_id_age_avg: float,persona_distraction_events: float,sub_fs_ls: int,device_id_age_min: int,
|
|
confidence_score_max: float,persona_phone_risk_score: float,ip_address_risk_level: str,
|
|
login_frequency: float,suspect_score: int,confidence_score: float,name_consistency: int,
|
|
ip_location_consistency: int) ->dict:
|
|
|
|
dtypes = {
|
|
'user_age': 'int',
|
|
'persona_entity_confidence_score': 'float',
|
|
'persona_selfie_similarity_score_right': 'float',
|
|
'persona_selfie_similarity_score_left': 'float',
|
|
'persona_hesitation_percentage': 'float',
|
|
'persona_hesitation_count': 'float',
|
|
'device_id_age_max': 'int',
|
|
'selfie_consistency_score_avg': 'float',
|
|
'device_consistency': 'int',
|
|
'selfie_consistency_score': 'float',
|
|
'global_fs_ls': 'int',
|
|
'inquiry_frequency': 'int',
|
|
'confidence_score_min': 'float',
|
|
'contract_date_fs_sub': 'int',
|
|
'browser_os': 'string',
|
|
'user_city_ip_match': 'int',
|
|
'device_id_age_avg': 'float',
|
|
'persona_distraction_events': 'float',
|
|
'sub_fs_ls': 'int',
|
|
'device_id_age_min': 'int',
|
|
'confidence_score_max': 'float',
|
|
'persona_phone_risk_score': 'float',
|
|
'ip_address_risk_level': 'string',
|
|
'login_frequency': 'float',
|
|
'suspect_score': 'int',
|
|
'confidence_score': 'float',
|
|
'name_consistency': 'int',
|
|
'ip_location_consistency': 'int'
|
|
}
|
|
|
|
input_data = {"user_age" : user_age,"persona_entity_confidence_score" : persona_entity_confidence_score,
|
|
"persona_selfie_similarity_score_right" : persona_selfie_similarity_score_right,
|
|
"persona_selfie_similarity_score_left" : persona_selfie_similarity_score_left,
|
|
"persona_hesitation_percentage" : persona_hesitation_percentage,
|
|
"persona_hesitation_count" : persona_hesitation_count,"device_id_age_max" : device_id_age_max,
|
|
"selfie_consistency_score_avg" : selfie_consistency_score_avg,"device_consistency" : device_consistency,
|
|
"selfie_consistency_score" : selfie_consistency_score,"global_fs_ls" : global_fs_ls,
|
|
"inquiry_frequency" : inquiry_frequency,"confidence_score_min" : confidence_score_min,
|
|
"contract_date_fs_sub" : contract_date_fs_sub,"browser_os" : browser_os,
|
|
"user_city_ip_match" : user_city_ip_match,"device_id_age_avg" : device_id_age_avg,
|
|
"persona_distraction_events" : persona_distraction_events,"sub_fs_ls" : sub_fs_ls,
|
|
"device_id_age_min" : device_id_age_min,"confidence_score_max" : confidence_score_max,
|
|
"persona_phone_risk_score" : persona_phone_risk_score,"ip_address_risk_level" : ip_address_risk_level,
|
|
"login_frequency" : login_frequency,"suspect_score" : suspect_score,"confidence_score" : confidence_score,
|
|
"name_consistency" : name_consistency,"ip_location_consistency" : ip_location_consistency}
|
|
|
|
df = pd.DataFrame(input_data, index=[False])
|
|
|
|
for column, dtype in dtypes.items():
|
|
if dtype == 'int' or dtype == 'float':
|
|
df[column] = pd.to_numeric(df[column], errors='coerce')
|
|
else:
|
|
df[column] = df[column].astype(str).str.lower()
|
|
|
|
output_data = df.iloc[0].where(pd.notnull(df.iloc[0]), None).to_dict()
|
|
|
|
logger.info(f"Fraud V1 Pre processed data: {output_data}")
|
|
|
|
return output_data
|
|
|