print and summary method for "imputation" class.
# S3 method for imputation
summary(object, ...)
an object of class "imputation", usually, a result of a call to imputate_na() or imputate_outlier().
further arguments passed to or from other methods.
summary.imputation() tries to be smart about formatting two kinds of imputation.
imputate_na
, imputate_outlier
, summary.imputation
.
# Generate data for the example
heartfailure2 <- heartfailure
heartfailure2[sample(seq(NROW(heartfailure2)), 20), "platelets"] <- NA
heartfailure2[sample(seq(NROW(heartfailure2)), 5), "smoking"] <- NA
# Impute missing values -----------------------------
# If the variable of interest is a numerical variables
platelets <- imputate_na(heartfailure2, platelets, yvar = death_event, method = "rpart")
summary(platelets)
#> * Impute missing values based on Recursive Partitioning and Regression Trees
#> - method : rpart
#>
#> * Information of Imputation (before vs after)
#> Original Imputation
#> described_variables "value" "value"
#> n "279" "299"
#> na "20" " 0"
<<<<<<< HEAD
#> mean "263003.5" "263166.1"
#> sd "99157.03" "96308.95"
#> se_mean "5936.376" "5569.691"
#> IQR "90500" "85500"
#> skewness "1.509088" "1.537292"
#> kurtosis "6.322138" "6.758119"
#> p00 "25100" "25100"
#> p01 "59580" "61780"
#> p05 "129700" "131800"
#> p10 "153000" "159000"
#> p20 "195200" "202200"
#> p25 "211500" "216000"
#> p30 "221000" "223000"
#> p40 "237000" "237000"
#> p50 "260000" "257000"
#> p60 "265000" "265000"
#> p70 "283600.0" "282637.8"
#> p75 "302000" "301500"
#> p80 "318400" "317400"
#> p90 "371600" "368000"
#> p95 "429100" "422500"
=======
#> mean "264440.4" "266370.0"
#> sd "98550.94" "97192.63"
#> se_mean "5900.090" "5620.795"
#> IQR "93000" "92500"
#> skewness "1.535869" "1.465506"
#> kurtosis "6.317502" "6.113675"
#> p00 "47000" "47000"
#> p01 "68240" "69840"
#> p05 "132900" "133000"
#> p10 "153000" "159000"
#> p20 "195200" "197200"
#> p25 "211500" "212500"
#> p30 "221000" "221000"
#> p40 "237000" "241200"
#> p50 "263000" "263358"
#> p60 "266000" "270000"
#> p70 "289600" "299200"
#> p75 "304500" "305000"
#> p80 "322200" "325800"
#> p90 "378000" "385600"
#> p95 "422500" "418400"
>>>>>>> 2455413f029244b566a37aeed1916eea79ac483b
#> p99 "560160" "544560"
#> p100 "850000" "850000"
# If the variable of interest is a categorical variables
smoking <- imputate_na(heartfailure2, smoking, yvar = death_event, method = "rpart")
summary(smoking)
#> * Impute missing values based on Recursive Partitioning and Regression Trees
#> - method : rpart
#>
#> * Information of Imputation (before vs after)
#> original imputation original_percent imputation_percent
<<<<<<< HEAD
#> No 200 204 66.89 68.23
#> Yes 94 95 31.44 31.77
=======
#> No 201 203 67.22 67.89
#> Yes 93 96 31.10 32.11
>>>>>>> 2455413f029244b566a37aeed1916eea79ac483b
#> <NA> 5 0 1.67 0.00
# Impute outliers ----------------------------------
# If the variable of interest is a numerical variable
platelets <- imputate_outlier(heartfailure2, platelets, method = "capping")
summary(platelets)
#> Impute outliers with capping
#>
#> * Information of Imputation (before vs after)
#> Original Imputation
#> described_variables "value" "value"
#> n "279" "279"
#> na "20" "20"
<<<<<<< HEAD
#> mean "263003.5" "259308.9"
#> sd "99157.03" "77943.41"
#> se_mean "5936.376" "4666.350"
#> IQR "90500" "90500"
#> skewness "1.5090881" "0.3714559"
#> kurtosis " 6.3221379" "-0.1898946"
#> p00 "25100" "87000"
#> p01 " 59580" "121340"
#> p05 "129700" "129970"
=======
#> mean "264440.4" "259799.2"
#> sd "98550.94" "78415.65"
#> se_mean "5900.090" "4694.622"
#> IQR "93000" "93000"
#> skewness "1.5358685" "0.2549886"
#> kurtosis " 6.3175020" "-0.2755936"
#> p00 "47000" "73000"
#> p01 " 68240" "101040"
#> p05 "132900" "132990"
>>>>>>> 2455413f029244b566a37aeed1916eea79ac483b
#> p10 "153000" "153000"
#> p20 "195200" "195200"
#> p25 "211500" "211500"
#> p30 "221000" "221000"
#> p40 "237000" "237000"
<<<<<<< HEAD
#> p50 "260000" "260000"
#> p60 "265000" "265000"
#> p70 "283600" "283600"
#> p75 "302000" "302000"
#> p80 "318400" "318400"
#> p90 "371600" "371600"
#> p95 "429100" "427210"
#> p99 "560160" "429100"
#> p100 "850000" "429100"
=======
#> p50 "263000" "263000"
#> p60 "266000" "266000"
#> p70 "289600" "289600"
#> p75 "304500" "304500"
#> p80 "322200" "322200"
#> p90 "378000" "378000"
#> p95 "422500" "422050"
#> p99 "560160" "422500"
#> p100 "850000" "427000"
>>>>>>> 2455413f029244b566a37aeed1916eea79ac483b