Apply calculate performance metrics for binary classification model evaluation.
run_performance(model, actual = NULL)
A model_df. results of predicted model that created by run_predict().
factor. A data of target variable to evaluate the model. It supports factor that has binary class.
model_df. results of predicted model. model_df is composed of tbl_df and contains the following variables.:
step : character. The current stage in the model fit process. The result of calling run_performance() is returned as "3.Performanced".
model_id : character. Type of fit model.
target : character. Name of target variable.
positive : character. Level of positive class of binary classification.
fitted_model : list. Fitted model object.
predicted : list. Predicted value by individual model. Each value has a predict_class class object.
performance : list. Calculate metrics by individual model. Each value has a numeric vector.
The performance metrics calculated are as follows.:
ZeroOneLoss : Normalized Zero-One Loss(Classification Error Loss).
Accuracy : Accuracy.
Precision : Precision.
Recall : Recall.
Sensitivity : Sensitivity.
Specificity : Specificity.
F1_Score : F1 Score.
Fbeta_Score : F-Beta Score.
LogLoss : Log loss / Cross-Entropy Loss.
AUC : Area Under the Receiver Operating Characteristic Curve (ROC AUC).
Gini : Gini Coefficient.
PRAUC : Area Under the Precision-Recall Curve (PR AUC).
LiftAUC : Area Under the Lift Chart.
GainAUC : Area Under the Gain Chart.
KS_Stat : Kolmogorov-Smirnov Statistic.
run_performance() is performed in parallel when calculating the performance evaluation index. However, it is not supported in MS-Windows operating system and RStudio environment.
# \donttest{
library(dplyr)
# Divide the train data set and the test data set.
sb <- rpart::kyphosis %>%
split_by(Kyphosis)
# Extract the train data set from original data set.
train <- sb %>%
extract_set(set = "train")
# Extract the test data set from original data set.
test <- sb %>%
extract_set(set = "test")
# Sampling for unbalanced data set using SMOTE(synthetic minority over-sampling technique).
train <- sb %>%
sampling_target(seed = 1234L, method = "ubSMOTE")
# Cleaning the set.
train <- train %>%
cleanse
#> ── Checking unique value ─────────────────────────── unique value is one ──
#> No variables that unique value is one.
#>
#> ── Checking unique rate ─────────────────────────────── high unique rate ──
#> No variables that high unique rate.
#>
#> ── Checking character variables ─────────────────────── categorical data ──
#> No character variables.
#>
#>
# Run the model fitting.
result <- run_models(.data = train, target = "Kyphosis", positive = "present")
result
#> # A tibble: 7 × 7
#> step model_id target is_factor positive negative fitted_model
#> <chr> <chr> <chr> <lgl> <chr> <chr> <list>
#> 1 1.Fitted logistic Kyphosis TRUE present absent <glm>
#> 2 1.Fitted rpart Kyphosis TRUE present absent <rpart>
#> 3 1.Fitted ctree Kyphosis TRUE present absent <BinaryTr>
#> 4 1.Fitted randomForest Kyphosis TRUE present absent <rndmFrs.>
#> 5 1.Fitted ranger Kyphosis TRUE present absent <ranger>
#> 6 1.Fitted xgboost Kyphosis TRUE present absent <xgb.Bstr>
#> 7 1.Fitted lasso Kyphosis TRUE present absent <lognet>
# Predict the model. (Case 1)
pred <- run_predict(result, test)
pred
#> # A tibble: 7 × 8
#> step model_id target is_factor positive negative fitted_model predicted
#> <chr> <chr> <chr> <lgl> <chr> <chr> <list> <list>
#> 1 2.Predict… logistic Kypho… TRUE present absent <glm> <prdct_cl>
#> 2 2.Predict… rpart Kypho… TRUE present absent <rpart> <prdct_cl>
#> 3 2.Predict… ctree Kypho… TRUE present absent <BinaryTr> <prdct_cl>
#> 4 2.Predict… randomF… Kypho… TRUE present absent <rndmFrs.> <prdct_cl>
#> 5 2.Predict… ranger Kypho… TRUE present absent <ranger> <prdct_cl>
#> 6 2.Predict… xgboost Kypho… TRUE present absent <xgb.Bstr> <prdct_cl>
#> 7 2.Predict… lasso Kypho… TRUE present absent <lognet> <prdct_cl>
# Calculate performace metrics. (Case 1)
perf <- run_performance(pred)
#> Error in purrr::map(., ~future::value(.x)): ℹ In index: 1.
#> Caused by error:
#> ! object 'pred' not found
perf
#> Error in eval(expr, envir, enclos): object 'perf' not found
perf$performance
#> Error in eval(expr, envir, enclos): object 'perf' not found
# Predict the model. (Case 2)
pred <- run_predict(result, test[, -1])
pred
#> # A tibble: 7 × 8
#> step model_id target is_factor positive negative fitted_model predicted
#> <chr> <chr> <chr> <lgl> <chr> <chr> <list> <list>
#> 1 2.Predict… logistic Kypho… TRUE present absent <glm> <prdct_cl>
#> 2 2.Predict… rpart Kypho… TRUE present absent <rpart> <prdct_cl>
#> 3 2.Predict… ctree Kypho… TRUE present absent <BinaryTr> <prdct_cl>
#> 4 2.Predict… randomF… Kypho… TRUE present absent <rndmFrs.> <prdct_cl>
#> 5 2.Predict… ranger Kypho… TRUE present absent <ranger> <prdct_cl>
#> 6 2.Predict… xgboost Kypho… TRUE present absent <xgb.Bstr> <prdct_cl>
#> 7 2.Predict… lasso Kypho… TRUE present absent <lognet> <prdct_cl>
# Calculate performace metrics. (Case 2)
perf <- run_performance(pred, pull(test[, 1]))
#> Error in purrr::map(., ~future::value(.x)): ℹ In index: 1.
#> Caused by error:
#> ! object 'pred' not found
perf
#> Error in eval(expr, envir, enclos): object 'perf' not found
perf$performance
#> Error in eval(expr, envir, enclos): object 'perf' not found
# Convert to matrix for compare performace.
sapply(perf$performance, "c")
#> Error in eval(expr, envir, enclos): object 'perf' not found
# }