Visualize two kinds of plot by attribute of `imputation` class. The imputation of a numerical variable is a density plot, and the imputation of a categorical variable is a bar plot.

# S3 method for imputation
plot(x, typographic = TRUE, base_family = NULL, ...)

Arguments

x

an object of class "imputation", usually, a result of a call to imputate_na() or imputate_outlier().

typographic

logical. Whether to apply focuses on typographic elements to ggplot2 visualization. The default is TRUE. if TRUE provides a base theme that focuses on typographic elements using hrbrthemes package.

base_family

character. The name of the base font family to use for the visualization. If not specified, the font defined in dlookr is applied. (See details)

...

arguments to be passed to methods, such as graphical parameters (see par). only applies when the model argument is TRUE, and is used for ... of the plot.lm() function.

Details

The base_family is selected from "Roboto Condensed", "Liberation Sans Narrow", "NanumSquare", "Noto Sans Korean". If you want to use a different font, use it after loading the Google font with import_google_font().

See also

Examples

# \donttest{ # Generate data for the example heartfailure2 <- heartfailure heartfailure2[sample(seq(NROW(heartfailure2)), 20), "platelets"] <- NA heartfailure2[sample(seq(NROW(heartfailure2)), 5), "smoking"] <- NA # Impute missing values ----------------------------- # If the variable of interest is a numerical variables platelets <- imputate_na(heartfailure2, platelets, death_event, method = "rpart") platelets
#> [1] 265000.0 263358.0 261986.2 210000.0 327000.0 204000.0 127000.0 454000.0 #> [9] 263358.0 388000.0 368000.0 253000.0 136000.0 276000.0 427000.0 47000.0 #> [17] 262000.0 166000.0 237000.0 87000.0 276000.0 297000.0 289000.0 368000.0 #> [25] 263358.0 149000.0 196000.0 284000.0 153000.0 200000.0 263358.0 360000.0 #> [33] 319000.0 302000.0 188000.0 228000.0 226000.0 321000.0 305000.0 329000.0 #> [41] 263358.0 153000.0 185000.0 218000.0 194000.0 310000.0 271000.0 451000.0 #> [49] 265686.5 395000.0 166000.0 418000.0 263358.0 351000.0 255000.0 461000.0 #> [57] 223000.0 216000.0 265686.5 254000.0 390000.0 216000.0 254000.0 385000.0 #> [65] 263358.0 119000.0 213000.0 274000.0 244000.0 497000.0 374000.0 122000.0 #> [73] 243000.0 149000.0 266000.0 204000.0 317000.0 237000.0 283000.0 324000.0 #> [81] 258263.2 263358.0 196000.0 172000.0 302000.0 406000.0 173000.0 304000.0 #> [89] 235000.0 181000.0 249000.0 297000.0 263358.0 210000.0 327000.0 261986.2 #> [97] 254000.0 255000.0 318000.0 221000.0 298000.0 263358.0 149000.0 226000.0 #> [105] 286000.0 621000.0 263000.0 352983.3 304000.0 850000.0 306000.0 186153.8 #> [113] 252000.0 351000.0 328000.0 164000.0 271000.0 261986.2 261986.2 263358.0 #> [121] 210000.0 162000.0 228000.0 265686.5 217000.0 237000.0 271000.0 300000.0 #> [129] 267000.0 227000.0 249000.0 215717.8 263358.0 295000.0 231000.0 263358.0 #> [137] 172000.0 305000.0 221000.0 211000.0 263358.0 348000.0 329000.0 229000.0 #> [145] 338000.0 266000.0 218000.0 242000.0 225000.0 228000.0 235000.0 244000.0 #> [153] 184000.0 263358.0 235000.0 194000.0 277000.0 262000.0 235000.0 362000.0 #> [161] 242000.0 174000.0 448000.0 75000.0 334000.0 192000.0 220000.0 70000.0 #> [169] 270000.0 305000.0 263358.0 325000.0 176000.0 189000.0 281000.0 337000.0 #> [177] 105000.0 132000.0 267000.0 279000.0 303000.0 221000.0 265000.0 224000.0 #> [185] 219000.0 389000.0 153000.0 365000.0 201000.0 275000.0 350000.0 309000.0 #> [193] 260000.0 261986.2 263203.7 223000.0 263358.0 259000.0 279000.0 263358.0 #> [201] 73000.0 377000.0 220000.0 212000.0 277000.0 362000.0 226000.0 186000.0 #> [209] 283000.0 268000.0 389000.0 147000.0 481000.0 244000.0 290000.0 203000.0 #> [217] 358000.0 151000.0 271000.0 371000.0 263358.0 194000.0 365000.0 130000.0 #> [225] 504000.0 265000.0 189000.0 141000.0 186153.8 274000.0 62000.0 261986.2 #> [233] 255000.0 330000.0 305000.0 406000.0 248000.0 173000.0 257000.0 263358.0 #> [241] 533000.0 249000.0 255000.0 220000.0 264000.0 282000.0 314000.0 246000.0 #> [249] 301000.0 223000.0 404000.0 231000.0 274000.0 236000.0 215717.8 334000.0 #> [257] 294000.0 253000.0 233000.0 308000.0 203000.0 283000.0 261986.2 208000.0 #> [265] 147000.0 362000.0 204081.6 133000.0 302000.0 222000.0 263358.0 221000.0 #> [273] 215000.0 189000.0 150000.0 422000.0 327000.0 25100.0 232000.0 451000.0 #> [281] 241000.0 51000.0 263203.7 263358.0 279000.0 336000.0 279000.0 543000.0 #> [289] 263358.0 390000.0 222000.0 133000.0 382000.0 179000.0 155000.0 270000.0 #> [297] 742000.0 140000.0 261986.2 #> attr(,"var_type") #> [1] "numerical" #> attr(,"method") #> [1] "rpart" #> attr(,"na_pos") #> [1] 3 49 59 81 96 108 112 118 119 124 132 194 195 229 232 255 263 267 283 #> [20] 299 #> attr(,"type") #> [1] "missing values" #> attr(,"message") #> [1] "complete imputation" #> attr(,"success") #> [1] TRUE #> attr(,"class") #> [1] "imputation" "numeric"
summary(platelets)
#> * Impute missing values based on Recursive Partitioning and Regression Trees #> - method : rpart #> #> * Information of Imputation (before vs after) #> Original Imputation #> n 2.790000e+02 2.990000e+02 #> na 2.000000e+01 0.000000e+00 #> mean 2.653309e+05 2.644340e+05 #> sd 9.806222e+04 9.522328e+04 #> se_mean 5.870832e+03 5.506905e+03 #> IQR 8.850000e+04 8.600000e+04 #> skewness 1.470178e+00 1.522491e+00 #> kurtosis 6.458128e+00 6.977392e+00 #> p00 2.510000e+04 2.510000e+04 #> p01 5.958000e+04 6.178000e+04 #> p05 1.329000e+05 1.330000e+05 #> p10 1.530000e+05 1.606000e+05 #> p20 2.006000e+05 2.030000e+05 #> p25 2.160000e+05 2.160000e+05 #> p30 2.224000e+05 2.230000e+05 #> p40 2.422000e+05 2.440000e+05 #> p50 2.633580e+05 2.620000e+05 #> p60 2.670000e+05 2.650000e+05 #> p70 2.896000e+05 2.830000e+05 #> p75 3.045000e+05 3.020000e+05 #> p80 3.244000e+05 3.184000e+05 #> p90 3.746000e+05 3.686000e+05 #> p95 4.225000e+05 4.184000e+05 #> p99 5.601600e+05 5.445600e+05 #> p100 8.500000e+05 8.500000e+05
plot(platelets)
# If the variable of interest is a categorical variables smoking <- imputate_na(heartfailure2, smoking, death_event, method = "mice")
#> #> iter imp variable #> 1 1 platelets smoking #> 1 2 platelets smoking #> 1 3 platelets smoking #> 1 4 platelets smoking #> 1 5 platelets smoking #> 2 1 platelets smoking #> 2 2 platelets smoking #> 2 3 platelets smoking #> 2 4 platelets smoking #> 2 5 platelets smoking #> 3 1 platelets smoking #> 3 2 platelets smoking #> 3 3 platelets smoking #> 3 4 platelets smoking #> 3 5 platelets smoking #> 4 1 platelets smoking #> 4 2 platelets smoking #> 4 3 platelets smoking #> 4 4 platelets smoking #> 4 5 platelets smoking #> 5 1 platelets smoking #> 5 2 platelets smoking #> 5 3 platelets smoking #> 5 4 platelets smoking #> 5 5 platelets smoking
smoking
#> [1] No No Yes No No Yes No Yes No Yes Yes Yes No No No No No No #> [19] No No No No Yes No No Yes No Yes No Yes No No No No No No #> [37] No No No No Yes Yes Yes No No Yes No Yes No No No No No No #> [55] Yes No Yes Yes Yes Yes No No Yes No No Yes No Yes No No Yes Yes #> [73] Yes Yes Yes Yes Yes No Yes No No Yes No No No No No No No No #> [91] Yes No No No No No No No No No No No Yes Yes No Yes No No #> [109] Yes Yes Yes Yes No No No No No No No No Yes No No No No No #> [127] No No Yes No Yes No No Yes Yes No No No No No No No No No #> [145] Yes Yes Yes No No No No No Yes Yes No No No Yes No Yes No Yes #> [163] Yes No No No Yes No No No Yes Yes Yes No Yes Yes Yes No No Yes #> [181] No Yes Yes No Yes No No No No No No No No Yes No No No No #> [199] No Yes No No No Yes Yes No No No No No Yes Yes Yes No No No #> [217] No No No No No Yes Yes No No No No No No No No Yes No Yes #> [235] Yes No No No No Yes No Yes Yes No No No No No Yes No No No #> [253] No No No Yes No No No Yes No No No No No Yes Yes No No No #> [271] Yes No No No Yes No No No No No No Yes Yes Yes No No No No #> [289] No No No No Yes Yes Yes No No Yes Yes #> attr(,"var_type") #> [1] categorical #> attr(,"method") #> [1] mice #> attr(,"na_pos") #> [1] 34 98 175 226 227 #> attr(,"seed") #> [1] 42951 #> attr(,"type") #> [1] missing values #> attr(,"message") #> [1] complete imputation #> attr(,"success") #> [1] TRUE #> Levels: No Yes
summary(smoking)
#> * Impute missing values based on Multivariate Imputation by Chained Equations #> - method : mice #> - random seed : 42951 #> #> * Information of Imputation (before vs after) #> original imputation original_percent imputation_percent #> No 200 204 66.89 68.23 #> Yes 94 95 31.44 31.77 #> <NA> 5 0 1.67 0.00
plot(smoking)
# Impute outliers ---------------------------------- # If the variable of interest is a numerical variable platelets <- imputate_outlier(heartfailure2, platelets, method = "capping") platelets
#> [1] 265000 263358 NA 210000 327000 204000 127000 422500 263358 388000 #> [11] 368000 253000 136000 276000 427000 132900 262000 166000 237000 87000 #> [21] 276000 297000 289000 368000 263358 149000 196000 284000 153000 200000 #> [31] 263358 360000 319000 302000 188000 228000 226000 321000 305000 329000 #> [41] 263358 153000 185000 218000 194000 310000 271000 422500 NA 395000 #> [51] 166000 418000 263358 351000 255000 422500 223000 216000 NA 254000 #> [61] 390000 216000 254000 385000 263358 119000 213000 274000 244000 422500 #> [71] 374000 122000 243000 149000 266000 204000 317000 237000 283000 324000 #> [81] NA 263358 196000 172000 302000 406000 173000 304000 235000 181000 #> [91] 249000 297000 263358 210000 327000 NA 254000 255000 318000 221000 #> [101] 298000 263358 149000 226000 286000 422500 263000 NA 304000 422500 #> [111] 306000 NA 252000 351000 328000 164000 271000 NA NA 263358 #> [121] 210000 162000 228000 NA 217000 237000 271000 300000 267000 227000 #> [131] 249000 NA 263358 295000 231000 263358 172000 305000 221000 211000 #> [141] 263358 348000 329000 229000 338000 266000 218000 242000 225000 228000 #> [151] 235000 244000 184000 263358 235000 194000 277000 262000 235000 362000 #> [161] 242000 174000 422500 132900 334000 192000 220000 132900 270000 305000 #> [171] 263358 325000 176000 189000 281000 337000 105000 132000 267000 279000 #> [181] 303000 221000 265000 224000 219000 389000 153000 365000 201000 275000 #> [191] 350000 309000 260000 NA NA 223000 263358 259000 279000 263358 #> [201] 132900 377000 220000 212000 277000 362000 226000 186000 283000 268000 #> [211] 389000 147000 422500 244000 290000 203000 358000 151000 271000 371000 #> [221] 263358 194000 365000 130000 422500 265000 189000 141000 NA 274000 #> [231] 132900 NA 255000 330000 305000 406000 248000 173000 257000 263358 #> [241] 422500 249000 255000 220000 264000 282000 314000 246000 301000 223000 #> [251] 404000 231000 274000 236000 NA 334000 294000 253000 233000 308000 #> [261] 203000 283000 NA 208000 147000 362000 NA 133000 302000 222000 #> [271] 263358 221000 215000 189000 150000 422000 327000 132900 232000 422500 #> [281] 241000 132900 NA 263358 279000 336000 279000 422500 263358 390000 #> [291] 222000 133000 382000 179000 155000 270000 422500 140000 NA #> attr(,"method") #> [1] "capping" #> attr(,"var_type") #> [1] "numerical" #> attr(,"outlier_pos") #> [1] 8 16 48 56 70 106 110 163 164 168 201 213 225 231 241 278 280 282 288 #> [20] 297 #> attr(,"outliers") #> [1] 454000 47000 451000 461000 497000 621000 850000 448000 75000 70000 #> [11] 73000 481000 504000 62000 533000 25100 451000 51000 543000 742000 #> attr(,"type") #> [1] "outliers" #> attr(,"message") #> [1] "complete imputation" #> attr(,"success") #> [1] TRUE #> attr(,"class") #> [1] "imputation" "numeric"
summary(platelets)
#> Impute outliers with capping #> #> * Information of Imputation (before vs after) #> Original Imputation #> n 2.790000e+02 2.790000e+02 #> na 2.000000e+01 2.000000e+01 #> mean 2.653309e+05 2.616883e+05 #> sd 9.806222e+04 7.661502e+04 #> se_mean 5.870832e+03 4.586821e+03 #> IQR 8.850000e+04 8.850000e+04 #> skewness 1.470178e+00 2.882524e-01 #> kurtosis 6.458128e+00 -2.998563e-01 #> p00 2.510000e+04 8.700000e+04 #> p01 5.958000e+04 1.213400e+05 #> p05 1.329000e+05 1.329900e+05 #> p10 1.530000e+05 1.530000e+05 #> p20 2.006000e+05 2.006000e+05 #> p25 2.160000e+05 2.160000e+05 #> p30 2.224000e+05 2.224000e+05 #> p40 2.422000e+05 2.422000e+05 #> p50 2.633580e+05 2.633580e+05 #> p60 2.670000e+05 2.670000e+05 #> p70 2.896000e+05 2.896000e+05 #> p75 3.045000e+05 3.045000e+05 #> p80 3.244000e+05 3.244000e+05 #> p90 3.746000e+05 3.746000e+05 #> p95 4.225000e+05 4.220500e+05 #> p99 5.601600e+05 4.225000e+05 #> p100 8.500000e+05 4.270000e+05
plot(platelets)
# }