-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathxgboos_modeling_caret.R
More file actions
73 lines (56 loc) · 2.35 KB
/
xgboos_modeling_caret.R
File metadata and controls
73 lines (56 loc) · 2.35 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
set.seed(123)
library(DMwR)
library(caret)
library(e1071)
mydata <- read_csv('Data_set.csv')
mydata_imp <- read_csv('Data_imputed.csv')
feature_names <- c("BNK_LNIF_CNT", "CPT_LNIF_CNT", "SPART_LNIF_CNT",
"ECT_LNIF_CNT", "TOT_LNIF_AMT", "TOT_CLIF_AMT",
"BNK_LNIF_AMT", "CPT_LNIF_AMT", "CRDT_OCCR_MDIF",
"SPTCT_OCCR_MDIF", "CRDT_CARD_CNT", "CTCD_OCCR_MDIF",
"CB_GUIF_CNT", "CB_GUIF_AMT")
new_features <- c("SUSP_TOT_CATE")
# "FIRST_SECOND_RATIO", "ONLY_FIRST", "TOT_LNIF_CNT",
# "TOT_SECOND_CNT", "TOT_SECOND_AMT", "CB_GUIF_AMT_PER_CNT",
data_new <- mydata %>% select(TARGET, one_of(feature_names),
one_of(new_features))
data_new <- mydata %>% select(TARGET, one_of(feature_names))
# Split train and test data
train_ind <- createDataPartition(1:nrow(data_new), p = 0.7, list = F)
train <- data_new[train_ind, ]
test <- data_new[-train_ind, ]
# SMOTE target of train set
# train$TARGET <- as.factor(train$TARGET)
# train <- SMOTE(TARGET ~ ., train, perc.over = 100, perc.under=200)
# train$TARGET <- as.numeric(train$TARGET)
# Model fitting (xgb)
xgb_control <- trainControl(method = 'cv',
number = 10,
sampling = 'up',
verboseIter = TRUE,
returnData=FALSE,
returnResamp = 'all',
allowParallel = TRUE
)
xgb_grid <- expand.grid(nrounds = 100,
max_depth = 3,
eta = 0.1,
gamma = 1,
colsample_bytree = 1,
min_child_weight = 1,
subsample = 1)
train$TARGET <- as.factor(train$TARGET)
xgb_fit <- train(TARGET ~ .,
data = train,
method = 'xgbTree',
trControl = xgb_control,
tuneGrid = xgb_grid
)
xgb_pred <- predict(xgb_fit, newdata = test)
xgb_pred %>% confusionMatrix(test$TARGET)
# F1-score
precision <- 1075/(217+1075)
recall <- 1075/(5922+1075)
(F1 <- (2 * precision * recall) / (precision + recall))
# Feature Importance
plot(varImp(object = xgb_fit), main = 'XGB - Variable Importance')