This repository has been archived by the owner on Jan 27, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfraud_detection.py
115 lines (79 loc) · 4.09 KB
/
fraud_detection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import os
import sys
import time
import logging
import argparse
import pathlib
import pandas as pd
import matplotlib.pyplot as plt
import properties
from src.preprocessing.Preprocessor import Preprocessor
from src.modelling.Classifier import Classifier
def run():
os.system("cls" if os.name == 'nt' else "clear")
logging.info("Program started")
if __name__ == "__main__":
os.system("cls" if os.name == 'nt' else "clear")
parser = argparse.ArgumentParser(description=properties.DESCRIPTION_MESSAGE)
required_args = parser.add_argument_group('required arguments')
parser.add_argument("-v", "--verbose", help="increase output verbosity", action="store_true")
required_args.add_argument("-s", "--seed", help="Seed value for randomness", type=int, required=True)
parser.add_argument("--split_percentage", help="Validation method for classifier", action="store_true")
parser.add_argument("--test", help="Test percentage", type=float)
parser.add_argument("--cross_validation", help="Validation method for classifier", action="store_true")
required_args.add_argument("--fold", help="Fold count", type=int)
parser.add_argument("--algorithm", help="Test percentage", type=str)
parser.add_argument("--bypass_preprocess", action="store_true")
parser.add_argument("--apply_only_preprocess", action="store_true")
args = parser.parse_args()
script_path = pathlib.Path(__file__).parent.absolute()
if (not args.cross_validation) and (not args.split_percentage):
raise AttributeError("Cross validation or split percentage should be selected")
if args.cross_validation:
if not args.fold:
raise AttributeError("Fold is necessarry if Cross validation selected")
if args.split_percentage:
if not args.test:
raise AttributeError("Test is necessarry if Split percentage selected")
if args.verbose:
logging.basicConfig(format='%(levelname)s:\t%(message)s', level=logging.DEBUG)
logging.info("VERBOSITY ON")
else:
logging.basicConfig(format='%(levelname)s :\t%(message)s', level=logging.WARNING)
seed_val = args.seed
logging.info(f"Seed value is {seed_val}")
logging.info(f"SCRIPT PATH ==> {script_path}")
logging.info(f"dataset ==> {properties.DEFAULT_DATASET}")
"""
preprocessor_methods = [m for m in dir(Preprocessor) if not m.startswith('__')]
elapsed_times = dict.fromkeys(preprocessor_methods, 0)
status = dict.fromkeys(preprocessor_methods, False)
"""
if not args.bypass_preprocess:
preprocessor = Preprocessor(dataset_path=properties.DEFAULT_DATASET)
preprocessor.preprocess(
class_label=["Class", 0], # 0 FOR VALID 1 FOR FRADUENT
group_by=['Cardholder Last Name', 'Cardholder First Initial'],
random_fraction_per_group=0.5,
seed_val=seed_val
)
else:
logging.debug("Preprocesssing bypassed!")
if not args.apply_only_preprocess:
try:
data = pd.read_csv(f"{seed_val}_generated.csv")
except FileNotFoundError as e:
raise FileNotFoundError(f"Couldn't find the dataset for the seed value ({args.seed})")
classifier = Classifier(data, cross_validation=args.cross_validation, k=args.fold if args.fold else 10, test_ratio=args.test, algorithm=args.algorithm, seed_val=args.seed)
logging.debug(f"Selected algorithm: {args.algorithm} via {'cross validation' if args.cross_validation else 'percentage split'}")
classifier.analyze()
else:
logging.debug("apply_only_preprocess active")
"""
logging.debug(f"Elapsed time for initialization -> {it - s}")
logging.debug(f"Elapsed time for class labeling -> {ct - s}")
logging.debug(f"Elapsed time for grouping -> {gt - s}")
logging.debug(f"Elapsed time for random selection -> {rt - s}")
logging.debug(f"Elapsed time for fraud transation creation -> {ft - s}")
logging.debug(f"TOTAL ELAPSED TIME -> {time.time() - s}")
"""