|
26 | 26 | #' @importFrom ggplot2 geom_boxplot geom_histogram ggplot facet_wrap labs theme_bw labs aes |
27 | 27 | #' @importFrom graphics hist pairs panel.smooth par rect |
28 | 28 | #' @importFrom gt gt |
| 29 | +#' @importFrom htmltools h2 |
| 30 | +#' @importFrom htmlwidgets prependContent |
29 | 31 | #' @importFrom ipred bagging |
30 | 32 | #' @importFrom MachineShop fit |
31 | 33 | #' @importFrom magrittr %>% |
|
35 | 37 | #' @importFrom randomForest randomForest |
36 | 38 | #' @importFrom ranger ranger |
37 | 39 | #' @importFrom reactable reactable |
38 | | -#' @importFrom reactablefmtr add_title |
39 | 40 | #' @importFrom scales percent |
40 | 41 | #' @importFrom stats complete.cases cor lm predict reorder sd |
41 | 42 | #' @importFrom tidyr gather last_col pivot_longer |
|
45 | 46 | #' @returns a full analysis, including data visualizations, statistical summaries, and a full report on the results of 35 models on the data |
46 | 47 | #' @export Classification |
47 | 48 |
|
| 49 | + |
48 | 50 | #### Function definition #### |
49 | 51 | Classification <- function(data, colnum, numresamples, predict_on_new_data = c("Y", "N"), remove_VIF_above, scale_all_numeric_predictors_in_data, |
50 | 52 | how_to_handle_strings = c(0("No strings"), 1("Strings as factors")), set_seed = c("Y", "N"), save_all_trained_models = c("Y", "N"), |
@@ -82,23 +84,35 @@ for (i in 1:ncol(df)) { |
82 | 84 | } |
83 | 85 |
|
84 | 86 |
|
85 | | -VIF <- reactable::reactable(as.data.frame(VIF), |
| 87 | +VIF <- reactable::reactable(data.frame(VIF), |
86 | 88 | searchable = TRUE, pagination = FALSE, wrap = TRUE, rownames = TRUE, fullWidth = TRUE, filterable = TRUE, bordered = TRUE, |
87 | | - striped = TRUE, highlight = TRUE, resizable = TRUE |
88 | | -)%>% |
89 | | - reactablefmtr::add_title("Variance Inflation Factor") |
| 89 | + striped = TRUE, highlight = TRUE, resizable = TRUE, |
| 90 | +) |
| 91 | + |
| 92 | +htmltools::div(class = "table", |
| 93 | + htmltools::div(class = "title", "VIF") |
| 94 | +) |
| 95 | + |
| 96 | +VIF_report <- htmlwidgets::prependContent(VIF, htmltools::h2(class = "title", "VIF")) |
| 97 | + |
90 | 98 |
|
91 | 99 | head_df <- head(df) %>% dplyr::mutate_if(is.numeric, round, digits = 4) |
92 | 100 |
|
93 | 101 | head_df <- reactable::reactable(head_df, |
94 | 102 | searchable = TRUE, pagination = FALSE, wrap = TRUE, rownames = TRUE, fullWidth = TRUE, filterable = TRUE, bordered = TRUE, |
95 | 103 | striped = TRUE, highlight = TRUE, resizable = TRUE |
96 | | -)%>% |
97 | | - reactablefmtr::add_title("Head of the data frame") |
| 104 | +) |
| 105 | + |
| 106 | +htmltools::div(class = "table", |
| 107 | + htmltools::div(class = "title", "Head of the data frame") |
| 108 | +) |
| 109 | + |
| 110 | +head_df <- htmlwidgets::prependContent(head_df, htmltools::h2(class = "title", "Head of the data frame")) |
98 | 111 |
|
99 | 112 | data_summary <- summary(df) |
100 | 113 |
|
101 | 114 | #### Save all plots #### |
| 115 | + |
102 | 116 | if(save_all_plots == "Y"){ |
103 | 117 | width = as.numeric(readline("Width of the graphics: ")) |
104 | 118 | height = as.numeric(readline("Height of the graphics: ")) |
@@ -172,8 +186,13 @@ corrplot::corrplot(M1, method = "circle", title = title, mar = c(0, 0, 1, 0)) # |
172 | 186 | correlation_marix <- reactable::reactable(round(cor(df1), 4), |
173 | 187 | searchable = TRUE, pagination = FALSE, wrap = TRUE, rownames = TRUE, fullWidth = TRUE, filterable = TRUE, bordered = TRUE, |
174 | 188 | striped = TRUE, highlight = TRUE, resizable = TRUE |
175 | | -)%>% |
176 | | - reactablefmtr::add_title("Correlation of the data") |
| 189 | +) |
| 190 | +htmltools::div(class = "table", |
| 191 | + htmltools::div(class = "title", "Correlation matrix") |
| 192 | +) |
| 193 | + |
| 194 | +correlation_matrix <- htmlwidgets::prependContent(correlation_marix, htmltools::h2(class = "title", "Correlation matrix")) |
| 195 | + |
177 | 196 |
|
178 | 197 | #### Boxplots of the numeric data #### |
179 | 198 | boxplots <- df1 %>% |
@@ -785,9 +804,6 @@ train_ratio_df <- data.frame() |
785 | 804 | test_ratio_df <- data.frame() |
786 | 805 | validation_ratio_df <- data.frame() |
787 | 806 | stratified_sampling_report <- 0 |
788 | | - |
789 | | -Percentage <- 0 |
790 | | -Variable <- 0 |
791 | 807 | accuracy_plot <- 0 |
792 | 808 | total_plot <- 0 |
793 | 809 | holdout_vs_train_plot <- 0 |
@@ -818,8 +834,13 @@ if(stratified_random_column > 0){ |
818 | 834 |
|
819 | 835 | stratified_sampling_report <- reactable::reactable(df1, searchable = TRUE, pagination = FALSE, wrap = TRUE, rownames = TRUE, fullWidth = TRUE, filterable = TRUE, bordered = TRUE, |
820 | 836 | striped = TRUE, highlight = TRUE, resizable = TRUE |
821 | | - )%>% |
822 | | - reactablefmtr::add_title("Stratified Random Sampling Report") |
| 837 | + ) |
| 838 | + |
| 839 | + htmltools::div(class = "table", |
| 840 | + htmltools::div(class = "title", "stratified_sampling_report ") |
| 841 | + ) |
| 842 | + |
| 843 | + stratified_sampling_report <- htmlwidgets::prependContent(stratified_sampling_report , htmltools::h2(class = "title", "Stratified sampling report")) |
823 | 844 | } |
824 | 845 |
|
825 | 846 | #### Random resampling starts here #### |
@@ -1889,12 +1910,14 @@ for (i in 1:numresamples) { |
1889 | 1910 | head_ensemble <- reactable::reactable(head(ensemble1), |
1890 | 1911 | searchable = TRUE, pagination = FALSE, wrap = TRUE, rownames = TRUE, fullWidth = TRUE, filterable = TRUE, bordered = TRUE, |
1891 | 1912 | striped = TRUE, highlight = TRUE, resizable = TRUE |
1892 | | - )%>% |
1893 | | - reactablefmtr::add_title("Head of the ensemble") |
| 1913 | + ) |
| 1914 | + |
| 1915 | + htmltools::div(class = "table", |
| 1916 | + htmltools::div(class = "title", "head_ensemble") |
| 1917 | + ) |
| 1918 | + |
| 1919 | + head_ensemble <- htmlwidgets::prependContent(head_ensemble, htmltools::h2(class = "title", "Head of the ensemble")) |
1894 | 1920 |
|
1895 | | - # if(save_all_plots == "Y"){ |
1896 | | - # reactablefmtr::save_reactable_test(head_ensemble, "Head_of_the_ensemble.html") |
1897 | | - # } |
1898 | 1921 |
|
1899 | 1922 | ensemble_index <- sample(c(1:3), nrow(ensemble1), replace = TRUE, prob = c(train_amount, test_amount, validation_amount)) |
1900 | 1923 | ensemble_train <- ensemble1[ensemble_index == 1, ] |
@@ -2801,12 +2824,13 @@ Results <- Results %>% dplyr::arrange(dplyr::desc(Mean_Holdout_Accuracy)) |
2801 | 2824 | Final_results <- reactable::reactable(Results, |
2802 | 2825 | searchable = TRUE, pagination = FALSE, wrap = TRUE, fullWidth = TRUE, filterable = TRUE, bordered = TRUE, |
2803 | 2826 | striped = TRUE, highlight = TRUE, rownames = TRUE, resizable = TRUE |
2804 | | -) %>% |
2805 | | - reactablefmtr::add_title("Classification analysis, accuracy, duration, holdout_vs_train, sum of diagonals") |
| 2827 | +) |
2806 | 2828 |
|
2807 | | -# if(save_all_plots == "Y"){ |
2808 | | -# reactablefmtr::save_reactable_test(Final_results, "Final_results.html") |
2809 | | -# } |
| 2829 | +htmltools::div(class = "table", |
| 2830 | + htmltools::div(class = "title", "Final_results") |
| 2831 | +) |
| 2832 | + |
| 2833 | +summary_report <- htmlwidgets::prependContent(Final_results, htmltools::h2(class = "title", "Summary report")) |
2810 | 2834 |
|
2811 | 2835 | summary_tables <- list( |
2812 | 2836 | "Bagging" = bagging_table_total, "Bagged Random Forest" = bag_rf_table_total, "C50" = C50_table_total, |
@@ -3995,7 +4019,11 @@ if (predict_on_new_data == "Y") { |
3995 | 4019 | ) |
3996 | 4020 |
|
3997 | 4021 | if(save_all_plots == "Y"){ |
3998 | | - reactablefmtr::save_reactable_test(New_Data_Results, "New_Data_Results.html") |
| 4022 | + htmltools::div(class = "table", |
| 4023 | + htmltools::div(class = "title", "New_Data_Results") |
| 4024 | + ) |
| 4025 | + |
| 4026 | + new_data_results <- htmlwidgets::prependContent(New_Data_Results, htmltools::h2(class = "title", "New data results")) |
3999 | 4027 | } |
4000 | 4028 |
|
4001 | 4029 | if (save_all_trained_models == "Y") { |
@@ -4142,19 +4170,19 @@ if (save_all_trained_models == "Y") { |
4142 | 4170 |
|
4143 | 4171 | #### Return list of all reports #### |
4144 | 4172 | return(list( |
4145 | | - 'Final results' = Final_results, 'Barchart values' = barchart, 'Barchart percent' = barchart_percentage, "Accuracy Barchart" = accuracy_barchart, "Holdout vs train barchart" = holdout_vs_train_barchart, |
4146 | | - 'True positive rate fixed scales' = true_positive_rate_fixed_scales, 'True positive rate free scales' = true_positive_rate_free_scales, |
4147 | | - 'True negative rate fixed scales' = true_negative_rate_fixed_scales, 'True negative rate free scales' = true_negative_rate_free_scales, |
4148 | | - 'False positive rate fixed scales' = false_positive_rate_fixed_scales, 'False positive rate free scales' = false_positive_rate_free_scales, |
4149 | | - 'False negative rate fixed scales' = false_negative_rate_fixed_scales, 'False negative rate free scales' = false_negative_rate_free_scales, |
4150 | | - "Duration barchart" = duration_barchart, 'Data summary' = data_summary, 'Correlation matrix' = correlation_marix, |
4151 | | - 'VIF' = VIF, "Stratified sampling report" = stratified_sampling_report, |
4152 | | - 'Boxplots' = boxplots, 'Histograms' = histograms, 'Head of data' = head_df, 'Head of ensemble' = head_ensemble, |
4153 | | - 'Summary tables' = summary_tables, 'Accuracy_plot fixed scales' = accuracy_plot_fixed_scales, 'Accuracy plot free scales' = accuracy_plot_fixed_scales, |
4154 | | - 'Total plot fixed scales' = total_plot_fixed_scales, "Total plot free scales" = total_plot_free_scales, |
4155 | | - 'Classification error fixed scales' = classification_error_fixed_scales, 'Classification error free scales' = classification_error__free_scales, |
4156 | | - 'Residuals fixed scales' = residuals_plot_fixed_scales, 'Residuals free scales' = residuals_plot_free_scales, |
4157 | | - "Holdout vs train fixed scales" = holdout_vs_train_fixed_scales, "Holdout vs train free scales" = holdout_vs_train_free_scales |
| 4173 | + 'Final_results' = summary_report, 'Barchart_values' = barchart, 'Barchart_percent' = barchart_percentage, "Accuracy_Barchart" = accuracy_barchart, "holdout_vs_train_barchart" = holdout_vs_train_barchart, |
| 4174 | + 'True_positive_rate_fixed_scales' = true_positive_rate_fixed_scales, 'True_positive_rate_free_scales' = true_positive_rate_free_scales, |
| 4175 | + 'True_negative_rate_fixed_scales' = true_negative_rate_fixed_scales, 'True_negative_rate_free_scales' = true_negative_rate_free_scales, |
| 4176 | + 'False_positive_rate_fixed_scales' = false_positive_rate_fixed_scales, 'False_positive_rate_free_scales' = false_positive_rate_free_scales, |
| 4177 | + 'False_negative_rate_fixed_scales' = false_negative_rate_fixed_scales, 'False_negative_rate_free_scales' = false_negative_rate_free_scales, |
| 4178 | + "Duration_barchart" = duration_barchart, 'Data_summary' = data_summary, 'Correlation_matrix' = correlation_matrix, |
| 4179 | + 'VIF' = VIF_report, "Stratified sampling report" = stratified_sampling_report, |
| 4180 | + 'Boxplots' = boxplots, 'Histograms' = histograms, 'Head_of_data' = head_df, 'Head_of_ensemble' = head_ensemble, |
| 4181 | + 'Summary_tables' = summary_tables, 'Accuracy_plot_fixed_scales' = accuracy_plot_fixed_scales, 'Accuracy_plot_free_scales' = accuracy_plot_fixed_scales, |
| 4182 | + 'Total_plot_fixed_scales' = total_plot_fixed_scales, "Total_plot_free_scales" = total_plot_free_scales, |
| 4183 | + 'Classification_error_fixed_scales' = classification_error_fixed_scales, 'Classification_error_free_scales' = classification_error__free_scales, |
| 4184 | + 'Residuals_fixed_scales' = residuals_plot_fixed_scales, 'Residuals_free_scales' = residuals_plot_free_scales, |
| 4185 | + "Holdout_vs_train_fixed_scales" = holdout_vs_train_fixed_scales, "Holdout_vs_train_free_scales" = holdout_vs_train_free_scales |
4158 | 4186 | ) |
4159 | 4187 | ) |
4160 | 4188 | } |
0 commit comments