Visualize two kinds of plot by attribute of `imputation` class. The imputation of a numerical variable is a density plot, and the imputation of a categorical variable is a bar plot.
# S3 method for imputation plot(x, typographic = TRUE, base_family = NULL, ...)
x | an object of class "imputation", usually, a result of a call to imputate_na() or imputate_outlier(). |
---|---|
typographic | logical. Whether to apply focuses on typographic elements to ggplot2 visualization. The default is TRUE. if TRUE provides a base theme that focuses on typographic elements using hrbrthemes package. |
base_family | character. The name of the base font family to use for the visualization. If not specified, the font defined in dlookr is applied. (See details) |
... | arguments to be passed to methods, such as graphical parameters (see par). only applies when the model argument is TRUE, and is used for ... of the plot.lm() function. |
The base_family is selected from "Roboto Condensed", "Liberation Sans Narrow", "NanumSquare", "Noto Sans Korean". If you want to use a different font, use it after loading the Google font with import_google_font().
# \donttest{ # Generate data for the example heartfailure2 <- heartfailure heartfailure2[sample(seq(NROW(heartfailure2)), 20), "platelets"] <- NA heartfailure2[sample(seq(NROW(heartfailure2)), 5), "smoking"] <- NA # Impute missing values ----------------------------- # If the variable of interest is a numerical variables platelets <- imputate_na(heartfailure2, platelets, death_event, method = "rpart") platelets#> [1] 265000.0 263358.0 261986.2 210000.0 327000.0 204000.0 127000.0 454000.0 #> [9] 263358.0 388000.0 368000.0 253000.0 136000.0 276000.0 427000.0 47000.0 #> [17] 262000.0 166000.0 237000.0 87000.0 276000.0 297000.0 289000.0 368000.0 #> [25] 263358.0 149000.0 196000.0 284000.0 153000.0 200000.0 263358.0 360000.0 #> [33] 319000.0 302000.0 188000.0 228000.0 226000.0 321000.0 305000.0 329000.0 #> [41] 263358.0 153000.0 185000.0 218000.0 194000.0 310000.0 271000.0 451000.0 #> [49] 265686.5 395000.0 166000.0 418000.0 263358.0 351000.0 255000.0 461000.0 #> [57] 223000.0 216000.0 265686.5 254000.0 390000.0 216000.0 254000.0 385000.0 #> [65] 263358.0 119000.0 213000.0 274000.0 244000.0 497000.0 374000.0 122000.0 #> [73] 243000.0 149000.0 266000.0 204000.0 317000.0 237000.0 283000.0 324000.0 #> [81] 258263.2 263358.0 196000.0 172000.0 302000.0 406000.0 173000.0 304000.0 #> [89] 235000.0 181000.0 249000.0 297000.0 263358.0 210000.0 327000.0 261986.2 #> [97] 254000.0 255000.0 318000.0 221000.0 298000.0 263358.0 149000.0 226000.0 #> [105] 286000.0 621000.0 263000.0 352983.3 304000.0 850000.0 306000.0 186153.8 #> [113] 252000.0 351000.0 328000.0 164000.0 271000.0 261986.2 261986.2 263358.0 #> [121] 210000.0 162000.0 228000.0 265686.5 217000.0 237000.0 271000.0 300000.0 #> [129] 267000.0 227000.0 249000.0 215717.8 263358.0 295000.0 231000.0 263358.0 #> [137] 172000.0 305000.0 221000.0 211000.0 263358.0 348000.0 329000.0 229000.0 #> [145] 338000.0 266000.0 218000.0 242000.0 225000.0 228000.0 235000.0 244000.0 #> [153] 184000.0 263358.0 235000.0 194000.0 277000.0 262000.0 235000.0 362000.0 #> [161] 242000.0 174000.0 448000.0 75000.0 334000.0 192000.0 220000.0 70000.0 #> [169] 270000.0 305000.0 263358.0 325000.0 176000.0 189000.0 281000.0 337000.0 #> [177] 105000.0 132000.0 267000.0 279000.0 303000.0 221000.0 265000.0 224000.0 #> [185] 219000.0 389000.0 153000.0 365000.0 201000.0 275000.0 350000.0 309000.0 #> [193] 260000.0 261986.2 263203.7 223000.0 263358.0 259000.0 279000.0 263358.0 #> [201] 73000.0 377000.0 220000.0 212000.0 277000.0 362000.0 226000.0 186000.0 #> [209] 283000.0 268000.0 389000.0 147000.0 481000.0 244000.0 290000.0 203000.0 #> [217] 358000.0 151000.0 271000.0 371000.0 263358.0 194000.0 365000.0 130000.0 #> [225] 504000.0 265000.0 189000.0 141000.0 186153.8 274000.0 62000.0 261986.2 #> [233] 255000.0 330000.0 305000.0 406000.0 248000.0 173000.0 257000.0 263358.0 #> [241] 533000.0 249000.0 255000.0 220000.0 264000.0 282000.0 314000.0 246000.0 #> [249] 301000.0 223000.0 404000.0 231000.0 274000.0 236000.0 215717.8 334000.0 #> [257] 294000.0 253000.0 233000.0 308000.0 203000.0 283000.0 261986.2 208000.0 #> [265] 147000.0 362000.0 204081.6 133000.0 302000.0 222000.0 263358.0 221000.0 #> [273] 215000.0 189000.0 150000.0 422000.0 327000.0 25100.0 232000.0 451000.0 #> [281] 241000.0 51000.0 263203.7 263358.0 279000.0 336000.0 279000.0 543000.0 #> [289] 263358.0 390000.0 222000.0 133000.0 382000.0 179000.0 155000.0 270000.0 #> [297] 742000.0 140000.0 261986.2 #> attr(,"var_type") #> [1] "numerical" #> attr(,"method") #> [1] "rpart" #> attr(,"na_pos") #> [1] 3 49 59 81 96 108 112 118 119 124 132 194 195 229 232 255 263 267 283 #> [20] 299 #> attr(,"type") #> [1] "missing values" #> attr(,"message") #> [1] "complete imputation" #> attr(,"success") #> [1] TRUE #> attr(,"class") #> [1] "imputation" "numeric"summary(platelets)#> * Impute missing values based on Recursive Partitioning and Regression Trees #> - method : rpart #> #> * Information of Imputation (before vs after) #> Original Imputation #> n 2.790000e+02 2.990000e+02 #> na 2.000000e+01 0.000000e+00 #> mean 2.653309e+05 2.644340e+05 #> sd 9.806222e+04 9.522328e+04 #> se_mean 5.870832e+03 5.506905e+03 #> IQR 8.850000e+04 8.600000e+04 #> skewness 1.470178e+00 1.522491e+00 #> kurtosis 6.458128e+00 6.977392e+00 #> p00 2.510000e+04 2.510000e+04 #> p01 5.958000e+04 6.178000e+04 #> p05 1.329000e+05 1.330000e+05 #> p10 1.530000e+05 1.606000e+05 #> p20 2.006000e+05 2.030000e+05 #> p25 2.160000e+05 2.160000e+05 #> p30 2.224000e+05 2.230000e+05 #> p40 2.422000e+05 2.440000e+05 #> p50 2.633580e+05 2.620000e+05 #> p60 2.670000e+05 2.650000e+05 #> p70 2.896000e+05 2.830000e+05 #> p75 3.045000e+05 3.020000e+05 #> p80 3.244000e+05 3.184000e+05 #> p90 3.746000e+05 3.686000e+05 #> p95 4.225000e+05 4.184000e+05 #> p99 5.601600e+05 5.445600e+05 #> p100 8.500000e+05 8.500000e+05plot(platelets)# If the variable of interest is a categorical variables smoking <- imputate_na(heartfailure2, smoking, death_event, method = "mice")#> #> iter imp variable #> 1 1 platelets smoking #> 1 2 platelets smoking #> 1 3 platelets smoking #> 1 4 platelets smoking #> 1 5 platelets smoking #> 2 1 platelets smoking #> 2 2 platelets smoking #> 2 3 platelets smoking #> 2 4 platelets smoking #> 2 5 platelets smoking #> 3 1 platelets smoking #> 3 2 platelets smoking #> 3 3 platelets smoking #> 3 4 platelets smoking #> 3 5 platelets smoking #> 4 1 platelets smoking #> 4 2 platelets smoking #> 4 3 platelets smoking #> 4 4 platelets smoking #> 4 5 platelets smoking #> 5 1 platelets smoking #> 5 2 platelets smoking #> 5 3 platelets smoking #> 5 4 platelets smoking #> 5 5 platelets smokingsmoking#> [1] No No Yes No No Yes No Yes No Yes Yes Yes No No No No No No #> [19] No No No No Yes No No Yes No Yes No Yes No No No No No No #> [37] No No No No Yes Yes Yes No No Yes No Yes No No No No No No #> [55] Yes No Yes Yes Yes Yes No No Yes No No Yes No Yes No No Yes Yes #> [73] Yes Yes Yes Yes Yes No Yes No No Yes No No No No No No No No #> [91] Yes No No No No No No No No No No No Yes Yes No Yes No No #> [109] Yes Yes Yes Yes No No No No No No No No Yes No No No No No #> [127] No No Yes No Yes No No Yes Yes No No No No No No No No No #> [145] Yes Yes Yes No No No No No Yes Yes No No No Yes No Yes No Yes #> [163] Yes No No No Yes No No No Yes Yes Yes No Yes Yes Yes No No Yes #> [181] No Yes Yes No Yes No No No No No No No No Yes No No No No #> [199] No Yes No No No Yes Yes No No No No No Yes Yes Yes No No No #> [217] No No No No No Yes Yes No No No No No No No No Yes No Yes #> [235] Yes No No No No Yes No Yes Yes No No No No No Yes No No No #> [253] No No No Yes No No No Yes No No No No No Yes Yes No No No #> [271] Yes No No No Yes No No No No No No Yes Yes Yes No No No No #> [289] No No No No Yes Yes Yes No No Yes Yes #> attr(,"var_type") #> [1] categorical #> attr(,"method") #> [1] mice #> attr(,"na_pos") #> [1] 34 98 175 226 227 #> attr(,"seed") #> [1] 42951 #> attr(,"type") #> [1] missing values #> attr(,"message") #> [1] complete imputation #> attr(,"success") #> [1] TRUE #> Levels: No Yessummary(smoking)#> * Impute missing values based on Multivariate Imputation by Chained Equations #> - method : mice #> - random seed : 42951 #> #> * Information of Imputation (before vs after) #> original imputation original_percent imputation_percent #> No 200 204 66.89 68.23 #> Yes 94 95 31.44 31.77 #> <NA> 5 0 1.67 0.00plot(smoking)# Impute outliers ---------------------------------- # If the variable of interest is a numerical variable platelets <- imputate_outlier(heartfailure2, platelets, method = "capping") platelets#> [1] 265000 263358 NA 210000 327000 204000 127000 422500 263358 388000 #> [11] 368000 253000 136000 276000 427000 132900 262000 166000 237000 87000 #> [21] 276000 297000 289000 368000 263358 149000 196000 284000 153000 200000 #> [31] 263358 360000 319000 302000 188000 228000 226000 321000 305000 329000 #> [41] 263358 153000 185000 218000 194000 310000 271000 422500 NA 395000 #> [51] 166000 418000 263358 351000 255000 422500 223000 216000 NA 254000 #> [61] 390000 216000 254000 385000 263358 119000 213000 274000 244000 422500 #> [71] 374000 122000 243000 149000 266000 204000 317000 237000 283000 324000 #> [81] NA 263358 196000 172000 302000 406000 173000 304000 235000 181000 #> [91] 249000 297000 263358 210000 327000 NA 254000 255000 318000 221000 #> [101] 298000 263358 149000 226000 286000 422500 263000 NA 304000 422500 #> [111] 306000 NA 252000 351000 328000 164000 271000 NA NA 263358 #> [121] 210000 162000 228000 NA 217000 237000 271000 300000 267000 227000 #> [131] 249000 NA 263358 295000 231000 263358 172000 305000 221000 211000 #> [141] 263358 348000 329000 229000 338000 266000 218000 242000 225000 228000 #> [151] 235000 244000 184000 263358 235000 194000 277000 262000 235000 362000 #> [161] 242000 174000 422500 132900 334000 192000 220000 132900 270000 305000 #> [171] 263358 325000 176000 189000 281000 337000 105000 132000 267000 279000 #> [181] 303000 221000 265000 224000 219000 389000 153000 365000 201000 275000 #> [191] 350000 309000 260000 NA NA 223000 263358 259000 279000 263358 #> [201] 132900 377000 220000 212000 277000 362000 226000 186000 283000 268000 #> [211] 389000 147000 422500 244000 290000 203000 358000 151000 271000 371000 #> [221] 263358 194000 365000 130000 422500 265000 189000 141000 NA 274000 #> [231] 132900 NA 255000 330000 305000 406000 248000 173000 257000 263358 #> [241] 422500 249000 255000 220000 264000 282000 314000 246000 301000 223000 #> [251] 404000 231000 274000 236000 NA 334000 294000 253000 233000 308000 #> [261] 203000 283000 NA 208000 147000 362000 NA 133000 302000 222000 #> [271] 263358 221000 215000 189000 150000 422000 327000 132900 232000 422500 #> [281] 241000 132900 NA 263358 279000 336000 279000 422500 263358 390000 #> [291] 222000 133000 382000 179000 155000 270000 422500 140000 NA #> attr(,"method") #> [1] "capping" #> attr(,"var_type") #> [1] "numerical" #> attr(,"outlier_pos") #> [1] 8 16 48 56 70 106 110 163 164 168 201 213 225 231 241 278 280 282 288 #> [20] 297 #> attr(,"outliers") #> [1] 454000 47000 451000 461000 497000 621000 850000 448000 75000 70000 #> [11] 73000 481000 504000 62000 533000 25100 451000 51000 543000 742000 #> attr(,"type") #> [1] "outliers" #> attr(,"message") #> [1] "complete imputation" #> attr(,"success") #> [1] TRUE #> attr(,"class") #> [1] "imputation" "numeric"summary(platelets)#> Impute outliers with capping #> #> * Information of Imputation (before vs after) #> Original Imputation #> n 2.790000e+02 2.790000e+02 #> na 2.000000e+01 2.000000e+01 #> mean 2.653309e+05 2.616883e+05 #> sd 9.806222e+04 7.661502e+04 #> se_mean 5.870832e+03 4.586821e+03 #> IQR 8.850000e+04 8.850000e+04 #> skewness 1.470178e+00 2.882524e-01 #> kurtosis 6.458128e+00 -2.998563e-01 #> p00 2.510000e+04 8.700000e+04 #> p01 5.958000e+04 1.213400e+05 #> p05 1.329000e+05 1.329900e+05 #> p10 1.530000e+05 1.530000e+05 #> p20 2.006000e+05 2.006000e+05 #> p25 2.160000e+05 2.160000e+05 #> p30 2.224000e+05 2.224000e+05 #> p40 2.422000e+05 2.422000e+05 #> p50 2.633580e+05 2.633580e+05 #> p60 2.670000e+05 2.670000e+05 #> p70 2.896000e+05 2.896000e+05 #> p75 3.045000e+05 3.045000e+05 #> p80 3.244000e+05 3.244000e+05 #> p90 3.746000e+05 3.746000e+05 #> p95 4.225000e+05 4.220500e+05 #> p99 5.601600e+05 4.225000e+05 #> p100 8.500000e+05 4.270000e+05plot(platelets)# }