-
Notifications
You must be signed in to change notification settings - Fork 40
/
anomaly_transaction_detection.py
121 lines (92 loc) · 4.19 KB
/
anomaly_transaction_detection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import pandas as pd
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.ensemble import IsolationForest
from sklearn.metrics import classification_report
data = pd.read_csv("transaction_anomalies_dataset.csv")
#print(data.head())
#print(data.isnull().sum())
print(data.info())
print(data.describe())
# Distribution of Transaction Amount
'''fig_amount = px.histogram(data, x='Transaction_Amount',
nbins=20,
title='Distribution of Transaction Amount')
fig_amount.show()'''
# Transaction Amount by Account Type
'''fig_box_amount = px.box(data,
x='Account_Type',
y='Transaction_Amount',
title='Transaction Amount by Account Type')
fig_box_amount.show()'''
# Average Transaction Amount vs. Age
'''fig_scatter_avg_amount_age = px.scatter(data, x='Age',
y='Average_Transaction_Amount',
color='Account_Type',
title='Average Transaction Amount vs. Age',
trendline='ols')
fig_scatter_avg_amount_age.show()'''
# Count of Transactions by Day of the Week
'''fig_day_of_week = px.bar(data, x='Day_of_Week',
title='Count of Transactions by Day of the Week')
fig_day_of_week.show()'''
# Correlation Heatmap
'''correlation_matrix = data.corr()
fig_corr_heatmap = px.imshow(correlation_matrix,
title='Correlation Heatmap')
fig_corr_heatmap.show()'''
# Calculate mean and standard deviation of Transaction Amount
mean_amount = data['Transaction_Amount'].mean()
std_amount = data['Transaction_Amount'].std()
# Define the anomaly threshold (2 standard deviations from the mean)
anomaly_threshold = mean_amount + 2 * std_amount
# Flag anomalies
data['Is_Anomaly'] = data['Transaction_Amount'] > anomaly_threshold
# Scatter plot of Transaction Amount with anomalies highlighted
fig_anomalies = px.scatter(data, x='Transaction_Amount', y='Average_Transaction_Amount',
color='Is_Anomaly', title='Anomalies in Transaction Amount')
fig_anomalies.update_traces(marker=dict(size=12),
selector=dict(mode='markers', marker_size=1))
fig_anomalies.show()
# Calculate the number of anomalies
num_anomalies = data['Is_Anomaly'].sum()
# Calculate the total number of instances in the dataset
total_instances = data.shape[0]
# Calculate the ratio of anomalies
anomaly_ratio = num_anomalies / total_instances
print(anomaly_ratio)
relevant_features = ['Transaction_Amount',
'Average_Transaction_Amount',
'Frequency_of_Transactions']
# Split data into features (X) and target variable (y)
X = data[relevant_features]
y = data['Is_Anomaly']
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Train the Isolation Forest model
model = IsolationForest(contamination=0.02, random_state=42)
model.fit(X_train)
# Predict anomalies on the test set
y_pred = model.predict(X_test)
# Convert predictions to binary values (0: normal, 1: anomaly)
y_pred_binary = [1 if pred == -1 else 0 for pred in y_pred]
# Evaluate the model's performance
report = classification_report(y_test, y_pred_binary, target_names=['Normal', 'Anomaly'])
print(report)
# Relevant features used during training
relevant_features = ['Transaction_Amount', 'Average_Transaction_Amount', 'Frequency_of_Transactions']
# Get user inputs for features
user_inputs = []
for feature in relevant_features:
user_input = float(input(f"Enter the value for '{feature}': "))
user_inputs.append(user_input)
# Create a DataFrame from user inputs
user_df = pd.DataFrame([user_inputs], columns=relevant_features)
# Predict anomalies using the model
user_anomaly_pred = model.predict(user_df)
# Convert the prediction to binary value (0: normal, 1: anomaly)
user_anomaly_pred_binary = 1 if user_anomaly_pred == -1 else 0
if user_anomaly_pred_binary == 1:
print("Anomaly detected: This transaction is flagged as an anomaly.")
else:
print("No anomaly detected: This transaction is normal.")