ETD Pre-preprocessing block
This commit is contained in:
parent
70590db6b5
commit
d20e15fd4d
12
README.md
12
README.md
@ -1 +1,11 @@
|
|||||||
**Hello world!!!**
|
## Overview
|
||||||
|
This block (`block.py`) is responsible for preparing and validating inputs for the model. It performs data cleansing and returns a normalized output dictionary.
|
||||||
|
|
||||||
|
## Key Inputs & Outputs
|
||||||
|
- **Request**: Refer to `request_schema.json` for detailed input fields and validation rules.
|
||||||
|
- **Response**: Refer to `response_schema.json` for the returned structure and data types.
|
||||||
|
|
||||||
|
## Implementation Details
|
||||||
|
- All core logic resides in `block.py` within the `__main__` function.
|
||||||
|
- Example usage and validation are demonstrated in `test_block.py`.
|
||||||
|
|
||||||
|
|||||||
101
block.py
101
block.py
@ -1,21 +1,86 @@
|
|||||||
@flowx_block
|
import logging
|
||||||
def example_function(request: dict) -> dict:
|
import pandas as pd
|
||||||
|
import math
|
||||||
|
import json
|
||||||
|
|
||||||
# Processing logic here...
|
# Configure logging
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format="%(asctime)s [%(levelname)s] %(name)s - %(message)s",
|
||||||
|
)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
return {
|
# with open('C:/Users/abinisha/flowx/kiwi-blocks/sequence-2/fraud_v1_pre_processing/category_orders_train.json', 'r') as f:
|
||||||
"meta_info": [
|
# category_orders = json.load(f)
|
||||||
{
|
|
||||||
"name": "created_date",
|
def __main__(user_age: int,persona_entity_confidence_score: float,persona_selfie_similarity_score_right: float,
|
||||||
"type": "string",
|
persona_selfie_similarity_score_left: float,persona_hesitation_percentage: float,
|
||||||
"value": "2024-11-05"
|
persona_hesitation_count: float,device_id_age_max: int,selfie_consistency_score_avg: float,
|
||||||
}
|
device_consistency: int,selfie_consistency_score: float,global_fs_ls: int,inquiry_frequency: int,
|
||||||
],
|
confidence_score_min: float,contract_date_fs_sub: int,browser_os: str,user_city_ip_match: int,
|
||||||
"fields": [
|
device_id_age_avg: float,persona_distraction_events: float,sub_fs_ls: int,device_id_age_min: int,
|
||||||
{
|
confidence_score_max: float,persona_phone_risk_score: float,ip_address_risk_level: str,
|
||||||
"name": "",
|
login_frequency: float,suspect_score: int,confidence_score: float,name_consistency: int,
|
||||||
"type": "",
|
ip_location_consistency: int) ->dict:
|
||||||
"value": ""
|
|
||||||
}
|
dtypes = {
|
||||||
]
|
'user_age': 'int',
|
||||||
|
'persona_entity_confidence_score': 'float',
|
||||||
|
'persona_selfie_similarity_score_right': 'float',
|
||||||
|
'persona_selfie_similarity_score_left': 'float',
|
||||||
|
'persona_hesitation_percentage': 'float',
|
||||||
|
'persona_hesitation_count': 'float',
|
||||||
|
'device_id_age_max': 'int',
|
||||||
|
'selfie_consistency_score_avg': 'float',
|
||||||
|
'device_consistency': 'int',
|
||||||
|
'selfie_consistency_score': 'float',
|
||||||
|
'global_fs_ls': 'int',
|
||||||
|
'inquiry_frequency': 'int',
|
||||||
|
'confidence_score_min': 'float',
|
||||||
|
'contract_date_fs_sub': 'int',
|
||||||
|
'browser_os': 'string',
|
||||||
|
'user_city_ip_match': 'int',
|
||||||
|
'device_id_age_avg': 'float',
|
||||||
|
'persona_distraction_events': 'float',
|
||||||
|
'sub_fs_ls': 'int',
|
||||||
|
'device_id_age_min': 'int',
|
||||||
|
'confidence_score_max': 'float',
|
||||||
|
'persona_phone_risk_score': 'float',
|
||||||
|
'ip_address_risk_level': 'string',
|
||||||
|
'login_frequency': 'float',
|
||||||
|
'suspect_score': 'int',
|
||||||
|
'confidence_score': 'float',
|
||||||
|
'name_consistency': 'int',
|
||||||
|
'ip_location_consistency': 'int'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
input_data = {"user_age" : user_age,"persona_entity_confidence_score" : persona_entity_confidence_score,
|
||||||
|
"persona_selfie_similarity_score_right" : persona_selfie_similarity_score_right,
|
||||||
|
"persona_selfie_similarity_score_left" : persona_selfie_similarity_score_left,
|
||||||
|
"persona_hesitation_percentage" : persona_hesitation_percentage,
|
||||||
|
"persona_hesitation_count" : persona_hesitation_count,"device_id_age_max" : device_id_age_max,
|
||||||
|
"selfie_consistency_score_avg" : selfie_consistency_score_avg,"device_consistency" : device_consistency,
|
||||||
|
"selfie_consistency_score" : selfie_consistency_score,"global_fs_ls" : global_fs_ls,
|
||||||
|
"inquiry_frequency" : inquiry_frequency,"confidence_score_min" : confidence_score_min,
|
||||||
|
"contract_date_fs_sub" : contract_date_fs_sub,"browser_os" : browser_os,
|
||||||
|
"user_city_ip_match" : user_city_ip_match,"device_id_age_avg" : device_id_age_avg,
|
||||||
|
"persona_distraction_events" : persona_distraction_events,"sub_fs_ls" : sub_fs_ls,
|
||||||
|
"device_id_age_min" : device_id_age_min,"confidence_score_max" : confidence_score_max,
|
||||||
|
"persona_phone_risk_score" : persona_phone_risk_score,"ip_address_risk_level" : ip_address_risk_level,
|
||||||
|
"login_frequency" : login_frequency,"suspect_score" : suspect_score,"confidence_score" : confidence_score,
|
||||||
|
"name_consistency" : name_consistency,"ip_location_consistency" : ip_location_consistency}
|
||||||
|
|
||||||
|
df = pd.DataFrame(input_data, index=[False])
|
||||||
|
|
||||||
|
for column, dtype in dtypes.items():
|
||||||
|
if dtype == 'int' or dtype == 'float':
|
||||||
|
df[column] = pd.to_numeric(df[column], errors='coerce')
|
||||||
|
else:
|
||||||
|
df[column] = df[column].astype(str).str.lower()
|
||||||
|
|
||||||
|
output_data = df.iloc[0].where(pd.notnull(df.iloc[0]), None).to_dict()
|
||||||
|
|
||||||
|
logger.info(f"Fraud V1 Pre processed data: {output_data}")
|
||||||
|
|
||||||
|
return output_data
|
||||||
|
|
||||||
|
|||||||
@ -1 +1,119 @@
|
|||||||
{}
|
{
|
||||||
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"user_age": {
|
||||||
|
"type": ["integer", "null"],
|
||||||
|
"description": "Age of the user at the contract date, based on birthdate and contract date"
|
||||||
|
},
|
||||||
|
"persona_entity_confidence_score": {
|
||||||
|
"type": ["number", "null"],
|
||||||
|
"description": "Based on confidence reasons assign a score between 0 and 100"
|
||||||
|
},
|
||||||
|
"persona_selfie_similarity_score_right": {
|
||||||
|
"type": ["number", "null"],
|
||||||
|
"description": "Similarity score from the right side selfie"
|
||||||
|
},
|
||||||
|
"persona_selfie_similarity_score_left": {
|
||||||
|
"type": ["number", "null"],
|
||||||
|
"description": "Similarity score from the left side selfie"
|
||||||
|
},
|
||||||
|
"persona_hesitation_percentage": {
|
||||||
|
"type": ["number", "null"],
|
||||||
|
"description": "Percentage of time in the flow where the customer did not enter inputs"
|
||||||
|
},
|
||||||
|
"persona_hesitation_count": {
|
||||||
|
"type": ["number", "null"],
|
||||||
|
"description": "Persona hesitation count"
|
||||||
|
},
|
||||||
|
"device_id_age_max": {
|
||||||
|
"type": ["integer", "null"],
|
||||||
|
"description": "This calculates the maximum device age for a user and loan, similar to min and avg logic but for the max value."
|
||||||
|
},
|
||||||
|
"selfie_consistency_score_avg": {
|
||||||
|
"type": ["number", "null"],
|
||||||
|
"description": "Average selfie consistency score for the user's persona activity"
|
||||||
|
},
|
||||||
|
"device_consistency": {
|
||||||
|
"type": ["integer", "null"],
|
||||||
|
"description": "Number of distinct devices associated with a user and loan"
|
||||||
|
},
|
||||||
|
"selfie_consistency_score": {
|
||||||
|
"type": ["number", "null"],
|
||||||
|
"description": "Average similarity score between left and right selfie"
|
||||||
|
},
|
||||||
|
"global_fs_ls": {
|
||||||
|
"type": ["integer", "null"],
|
||||||
|
"description": "Days between the first and last global appearance of the device"
|
||||||
|
},
|
||||||
|
"inquiry_frequency": {
|
||||||
|
"type": ["integer", "null"],
|
||||||
|
"description": "Number of inquiries made by the user regarding the loan"
|
||||||
|
},
|
||||||
|
"confidence_score_min": {
|
||||||
|
"type": ["number", "null"],
|
||||||
|
"description": "The minimum recorded confidence score for the user and loan during the timeframe."
|
||||||
|
},
|
||||||
|
"contract_date_fs_sub": {
|
||||||
|
"type": ["integer", "null"],
|
||||||
|
"description": "Days between the first subscription appearance and contract date"
|
||||||
|
},
|
||||||
|
"browser_os": {
|
||||||
|
"type": ["string", "null"],
|
||||||
|
"description": "Browser OS"
|
||||||
|
},
|
||||||
|
"user_city_ip_match": {
|
||||||
|
"type": ["integer", "null"],
|
||||||
|
"description": "Checks if the user's city matches the IP city"
|
||||||
|
},
|
||||||
|
"device_id_age_avg": {
|
||||||
|
"type": ["number", "null"],
|
||||||
|
"description": "This calculates the rolling average of device_id_age for a user and loan. If no previous rows, the current value is returned."
|
||||||
|
},
|
||||||
|
"persona_distraction_events": {
|
||||||
|
"type": ["number", "null"],
|
||||||
|
"description": "Persona distraction events"
|
||||||
|
},
|
||||||
|
"sub_fs_ls": {
|
||||||
|
"type": ["integer", "null"],
|
||||||
|
"description": "Days between the first and last subscription activity"
|
||||||
|
},
|
||||||
|
"device_id_age_min": {
|
||||||
|
"type": ["integer", "null"],
|
||||||
|
"description": "This calculates the minimum device age for a user and loan, falling back to the current device age if no prior values exist."
|
||||||
|
},
|
||||||
|
"confidence_score_max": {
|
||||||
|
"type": ["number", "null"],
|
||||||
|
"description": "The maximum confidence score recorded for the user and loan combination."
|
||||||
|
},
|
||||||
|
"persona_phone_risk_score": {
|
||||||
|
"type": ["number", "null"],
|
||||||
|
"description": "Risk associated with the phone number. The risk score ranges from 0 to 100. The higher the risk score, the higher the risk level."
|
||||||
|
},
|
||||||
|
"ip_address_risk_level": {
|
||||||
|
"type": ["string", "null"],
|
||||||
|
"description": "Checks if the IP country code matches the persona country code"
|
||||||
|
},
|
||||||
|
"login_frequency": {
|
||||||
|
"type": ["number", "null"],
|
||||||
|
"description": "This counts the number of times the user logs in based on the inquiry_updated_at timestamp, providing insights into the user's login behavior throughout the loan process."
|
||||||
|
},
|
||||||
|
"suspect_score": {
|
||||||
|
"type": ["integer", "null"],
|
||||||
|
"description": "Suspect score"
|
||||||
|
},
|
||||||
|
"confidence_score": {
|
||||||
|
"type": ["number", "null"],
|
||||||
|
"description": "Confidence score"
|
||||||
|
},
|
||||||
|
"name_consistency": {
|
||||||
|
"type": ["integer", "null"],
|
||||||
|
"description": "Checks if the first name in the persona matches the user-provided first name"
|
||||||
|
},
|
||||||
|
"ip_location_consistency": {
|
||||||
|
"type": ["integer", "null"],
|
||||||
|
"description": "Number of distinct IP locations for a user and loan"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": []
|
||||||
|
}
|
||||||
|
|||||||
@ -1 +1 @@
|
|||||||
{}
|
pandas==2.2.2
|
||||||
@ -1 +1,118 @@
|
|||||||
{}
|
{
|
||||||
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"user_age": {
|
||||||
|
"type": ["integer", "null"],
|
||||||
|
"description": "Age of the user at the contract date, based on birthdate and contract date"
|
||||||
|
},
|
||||||
|
"persona_entity_confidence_score": {
|
||||||
|
"type": ["number", "null"],
|
||||||
|
"description": "Based on confidence reasons assign a score between 0 and 100"
|
||||||
|
},
|
||||||
|
"persona_selfie_similarity_score_right": {
|
||||||
|
"type": ["number", "null"],
|
||||||
|
"description": "Similarity score from the right side selfie"
|
||||||
|
},
|
||||||
|
"persona_selfie_similarity_score_left": {
|
||||||
|
"type": ["number", "null"],
|
||||||
|
"description": "Similarity score from the left side selfie"
|
||||||
|
},
|
||||||
|
"persona_hesitation_percentage": {
|
||||||
|
"type": ["number", "null"],
|
||||||
|
"description": "Percentage of time in the flow where the customer did not enter inputs"
|
||||||
|
},
|
||||||
|
"persona_hesitation_count": {
|
||||||
|
"type": ["number", "null"],
|
||||||
|
"description": "Persona hesitation count"
|
||||||
|
},
|
||||||
|
"device_id_age_max": {
|
||||||
|
"type": ["integer", "null"],
|
||||||
|
"description": "This calculates the maximum device age for a user and loan, similar to min and avg logic but for the max value."
|
||||||
|
},
|
||||||
|
"selfie_consistency_score_avg": {
|
||||||
|
"type": ["number", "null"],
|
||||||
|
"description": "Average selfie consistency score for the user's persona activity"
|
||||||
|
},
|
||||||
|
"device_consistency": {
|
||||||
|
"type": ["integer", "null"],
|
||||||
|
"description": "Number of distinct devices associated with a user and loan"
|
||||||
|
},
|
||||||
|
"selfie_consistency_score": {
|
||||||
|
"type": ["number", "null"],
|
||||||
|
"description": "Average similarity score between left and right selfie"
|
||||||
|
},
|
||||||
|
"global_fs_ls": {
|
||||||
|
"type": ["integer", "null"],
|
||||||
|
"description": "Days between the first and last global appearance of the device"
|
||||||
|
},
|
||||||
|
"inquiry_frequency": {
|
||||||
|
"type": ["integer", "null"],
|
||||||
|
"description": "Number of inquiries made by the user regarding the loan"
|
||||||
|
},
|
||||||
|
"confidence_score_min": {
|
||||||
|
"type": ["number", "null"],
|
||||||
|
"description": "The minimum recorded confidence score for the user and loan during the timeframe."
|
||||||
|
},
|
||||||
|
"contract_date_fs_sub": {
|
||||||
|
"type": ["integer", "null"],
|
||||||
|
"description": "Days between the first subscription appearance and contract date"
|
||||||
|
},
|
||||||
|
"browser_os": {
|
||||||
|
"type": ["string", "null"],
|
||||||
|
"description": "Browser OS"
|
||||||
|
},
|
||||||
|
"user_city_ip_match": {
|
||||||
|
"type": ["integer", "null"],
|
||||||
|
"description": "Checks if the user's city matches the IP city"
|
||||||
|
},
|
||||||
|
"device_id_age_avg": {
|
||||||
|
"type": ["number", "null"],
|
||||||
|
"description": "This calculates the rolling average of device_id_age for a user and loan. If no previous rows, the current value is returned."
|
||||||
|
},
|
||||||
|
"persona_distraction_events": {
|
||||||
|
"type": ["number", "null"],
|
||||||
|
"description": "Persona distraction events"
|
||||||
|
},
|
||||||
|
"sub_fs_ls": {
|
||||||
|
"type": ["integer", "null"],
|
||||||
|
"description": "Days between the first and last subscription activity"
|
||||||
|
},
|
||||||
|
"device_id_age_min": {
|
||||||
|
"type": ["integer", "null"],
|
||||||
|
"description": "This calculates the minimum device age for a user and loan, falling back to the current device age if no prior values exist."
|
||||||
|
},
|
||||||
|
"confidence_score_max": {
|
||||||
|
"type": ["number", "null"],
|
||||||
|
"description": "The maximum confidence score recorded for the user and loan combination."
|
||||||
|
},
|
||||||
|
"persona_phone_risk_score": {
|
||||||
|
"type": ["number", "null"],
|
||||||
|
"description": "Risk associated with the phone number. The risk score ranges from 0 to 100. The higher the risk score, the higher the risk level."
|
||||||
|
},
|
||||||
|
"ip_address_risk_level": {
|
||||||
|
"type": ["string", "null"],
|
||||||
|
"description": "Checks if the IP country code matches the persona country code"
|
||||||
|
},
|
||||||
|
"login_frequency": {
|
||||||
|
"type": ["number", "null"],
|
||||||
|
"description": "This counts the number of times the user logs in based on the inquiry_updated_at timestamp, providing insights into the user's login behavior throughout the loan process."
|
||||||
|
},
|
||||||
|
"suspect_score": {
|
||||||
|
"type": ["integer", "null"],
|
||||||
|
"description": "Suspect score"
|
||||||
|
},
|
||||||
|
"confidence_score": {
|
||||||
|
"type": ["number", "null"],
|
||||||
|
"description": "Confidence score"
|
||||||
|
},
|
||||||
|
"name_consistency": {
|
||||||
|
"type": ["integer", "null"],
|
||||||
|
"description": "Checks if the first name in the persona matches the user-provided first name"
|
||||||
|
},
|
||||||
|
"ip_location_consistency": {
|
||||||
|
"type": ["integer", "null"],
|
||||||
|
"description": "Number of distinct IP locations for a user and loan"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
24
test_block.py
Normal file
24
test_block.py
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
import unittest
|
||||||
|
import pandas as pd
|
||||||
|
from block import __main__
|
||||||
|
|
||||||
|
class TestBlock(unittest.TestCase):
|
||||||
|
|
||||||
|
def test_main_success(self):
|
||||||
|
result = __main__(user_age = 43,persona_entity_confidence_score = None,persona_selfie_similarity_score_right = None,persona_selfie_similarity_score_left = None,persona_hesitation_percentage = None,persona_hesitation_count = None,device_id_age_max = 181.0,selfie_consistency_score_avg = None,device_consistency = 1,selfie_consistency_score = None,global_fs_ls = 146.0,inquiry_frequency = 0,confidence_score_min = 1.0,contract_date_fs_sub = 181.0,browser_os = 'Android',user_city_ip_match = 1.0,device_id_age_avg = 181.0,persona_distraction_events = None,sub_fs_ls = 146.0,device_id_age_min = 181.0,confidence_score_max = 1.0,persona_phone_risk_score = None,ip_address_risk_level = None,login_frequency = 0,suspect_score = 0.0,confidence_score = 1.0,name_consistency = None,ip_location_consistency = 1)
|
||||||
|
|
||||||
|
expected_result = {"user_age":43,"persona_entity_confidence_score":None,"persona_selfie_similarity_score_right":None,"persona_selfie_similarity_score_left":None,"persona_hesitation_percentage":None,"persona_hesitation_count":None,"device_id_age_max":181.0,"selfie_consistency_score_avg":None,"device_consistency":1,"selfie_consistency_score":None,"global_fs_ls":146.0,"inquiry_frequency":0,"confidence_score_min":1.0,"contract_date_fs_sub":181.0,"browser_os":"android","user_city_ip_match":1.0,"device_id_age_avg":181.0,"persona_distraction_events":None,"sub_fs_ls":146.0,"device_id_age_min":181.0,"confidence_score_max":1.0,"persona_phone_risk_score":None,"ip_address_risk_level":"none","login_frequency":0,"suspect_score":0.0,"confidence_score":1.0,"name_consistency":None,"ip_location_consistency":1}
|
||||||
|
for key, expected_value in expected_result.items():
|
||||||
|
if isinstance(expected_value, float):
|
||||||
|
self.assertAlmostEqual(result[key], expected_value, places=6, msg=f"Mismatch for {key}")
|
||||||
|
elif expected_value is None:
|
||||||
|
self.assertTrue(pd.isna(result[key]), msg=f"Mismatch for {key}")
|
||||||
|
else:
|
||||||
|
self.assertEqual(result[key], expected_value, msg=f"Mismatch for {key}")
|
||||||
|
|
||||||
|
# def test_main_invalid_input(self):
|
||||||
|
# with self.assertRaises(TypeError):
|
||||||
|
# __main__(user_age = '43',persona_entity_confidence_score = None,persona_selfie_similarity_score_right = None,persona_selfie_similarity_score_left = None,persona_hesitation_percentage = None,persona_hesitation_count = None,device_id_age_max = 181.0,selfie_consistency_score_avg = None,device_consistency = 1,selfie_consistency_score = None,global_fs_ls = 146.0,inquiry_frequency = 0,confidence_score_min = 1.0,contract_date_fs_sub = 181.0,browser_os = 'Android',user_city_ip_match = 1.0,device_id_age_avg = 181.0,persona_distraction_events = None,sub_fs_ls = 146.0,device_id_age_min = 181.0,confidence_score_max = 1.0,persona_phone_risk_score = None,ip_address_risk_level = None,login_frequency = 0,suspect_score = 0.0,confidence_score = 1.0,name_consistency = None,ip_location_consistency = 1) # Invalid input type (string)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
Loading…
x
Reference in New Issue
Block a user