Performs variable transformation for standardization and resolving skewness of numerical variables.

transform(
  x,
  method = c("zscore", "minmax", "log", "log+1", "sqrt", "1/x", "x^2", "x^3", "Box-Cox",
    "Yeo-Johnson")
)

Arguments

x

numeric vector for transformation.

method

method of transformations.

Value

An object of transform class. Attributes of transform class is as follows.

  • method : method of transformation data.

    • Standardization

      • "zscore" : z-score transformation. (x - mu) / sigma

      • "minmax" : minmax transformation. (x - min) / (max - min)

    • Resolving Skewness

      • "log" : log transformation. log(x)

      • "log+1" : log transformation. log(x + 1). Used for values that contain 0.

      • "sqrt" : square root transformation.

      • "1/x" : 1 / x transformation

      • "x^2" : x square transformation

      • "x^3" : x^3 square transformation

      • "Box-Cox" : Box-Box transformation

      • "Yeo-Johnson" : Yeo-Johnson transformation

Details

transform() creates an transform class. The `transform` class includes original data, transformed data, and method of transformation.

See vignette("transformation") for an introduction to these concepts.

Examples

# \donttest{
# Standardization ------------------------------
creatinine_minmax <- transform(heartfailure$creatinine, method = "minmax")
creatinine_minmax
#>   [1] 0.15730337 0.06741573 0.08988764 0.15730337 0.24719101 0.17977528
#>   [7] 0.07865169 0.06741573 0.11235955 1.00000000 0.39325843 0.04494382
#>  [13] 0.06741573 0.06741573 0.05617978 0.08988764 0.04494382 0.03370787
#>  [19] 0.05617978 0.15730337 0.08988764 0.12359551 0.04494382 0.03370787
#>  [25] 0.14943820 0.15730337 0.05617978 0.08988764 0.59550562 0.07865169
#>  [31] 0.14943820 0.28089888 0.05617978 0.07865169 0.05617978 0.33707865
#>  [37] 0.05617978 0.05617978 0.20224719 0.28089888 0.14943820 0.07865169
#>  [43] 0.07865169 0.05617978 0.06741573 0.15730337 0.04494382 0.01123596
#>  [49] 0.43820225 0.05617978 0.05617978 0.10112360 0.70786517 0.05617978
#>  [55] 0.19101124 0.16853933 0.24719101 0.01123596 0.06741573 0.08988764
#>  [61] 0.05617978 0.20224719 0.06741573 0.05617978 0.07640449 0.26966292
#>  [67] 0.08988764 0.05617978 0.07865169 0.14943820 0.03370787 0.04494382
#>  [73] 0.05617978 0.08988764 0.07865169 0.02247191 0.03370787 0.07865169
#>  [79] 0.01123596 0.04494382 0.13483146 0.07640449 0.22471910 0.14606742
#>  [85] 0.05617978 0.02247191 0.06741573 0.03370787 0.02247191 0.06741573
#>  [91] 0.03370787 0.05617978 0.07640449 0.13483146 0.02247191 0.05617978
#>  [97] 0.08988764 0.06741573 0.07865169 0.06741573 0.06741573 0.07640449
#> [103] 0.06741573 0.05617978 0.20224719 0.13483146 0.08988764 0.04494382
#> [109] 0.06741573 0.08988764 0.07865169 0.07865169 0.12359551 0.08988764
#> [115] 0.07865169 0.05617978 0.02247191 0.30337079 0.04494382 0.14943820
#> [121] 0.11235955 0.05617978 0.02808989 0.04494382 0.35955056 0.08988764
#> [127] 0.17977528 0.03370787 0.02247191 0.32584270 0.02247191 0.62921348
#> [133] 0.07640449 0.08988764 0.07640449 0.07640449 0.04494382 0.17977528
#> [139] 0.05617978 0.03370787 0.06741573 0.04494382 0.04494382 0.04494382
#> [145] 0.13483146 0.02247191 0.02247191 0.05617978 0.14943820 0.04494382
#> [151] 0.22471910 0.04494382 0.04494382 0.07640449 0.03370787 0.13483146
#> [157] 0.10112360 0.05617978 0.08988764 0.06741573 0.07865169 0.03370787
#> [163] 0.04494382 0.04494382 0.06741573 0.08988764 0.02247191 0.21348315
#> [169] 0.05617978 0.03370787 0.11235955 0.04494382 0.06741573 0.03370787
#> [175] 0.04494382 0.05617978 0.05617978 0.05617978 0.07865169 0.02247191
#> [181] 0.04494382 0.05617978 0.07865169 0.22471910 0.07865169 0.11235955
#> [187] 0.01123596 0.17977528 0.05617978 0.04494382 0.17977528 0.11235955
#> [193] 0.02247191 0.07640449 0.12359551 0.14606742 0.07640449 0.03370787
#> [199] 0.05617978 0.14606742 0.02247191 0.05617978 0.04494382 0.33707865
#> [205] 0.02247191 0.05617978 0.03370787 0.04494382 0.05617978 0.03370787
#> [211] 0.05617978 0.03370787 0.10112360 0.12359551 0.03370787 0.08988764
#> [217] 0.04494382 0.95505618 0.06741573 0.02247191 0.14943820 0.06741573
#> [223] 0.06741573 0.03370787 0.05617978 0.10112360 0.08988764 0.05617978
#> [229] 0.50561798 0.07865169 0.13483146 0.06741573 0.04494382 0.10112360
#> [235] 0.06741573 0.06741573 0.06741573 0.07865169 0.05617978 0.07640449
#> [241] 0.08988764 0.08988764 0.06741573 0.04494382 0.14606742 0.10112360
#> [247] 0.06741573 0.21348315 0.05617978 0.07865169 0.00000000 0.03370787
#> [253] 0.05617978 0.07865169 0.05617978 0.05617978 0.13483146 0.05617978
#> [259] 0.03370787 0.02247191 0.05617978 0.02247191 0.10112360 0.05617978
#> [265] 0.07865169 0.04494382 0.14943820 0.13483146 0.04494382 0.05617978
#> [271] 0.12359551 0.04494382 0.07865169 0.02247191 0.05617978 0.03370787
#> [277] 0.06741573 0.06741573 0.02247191 0.08988764 0.05617978 0.24719101
#> [283] 0.37078652 0.06741573 0.03370787 0.07865169 0.13483146 0.05617978
#> [289] 0.06741573 0.04494382 0.03370787 0.10112360 0.05617978 0.04494382
#> [295] 0.06741573 0.07865169 0.03370787 0.10112360 0.12359551
summary(creatinine_minmax)
#> * Standardization with minmax
#> 
#> * Information of Transformation (before vs after)
#>              Original Transformation
#> n        299.00000000   2.990000e+02
#> na         0.00000000   0.000000e+00
#> mean       1.39387960   1.004359e-01
#> sd         1.03451006   1.162371e-01
#> se_mean    0.05982726   6.722164e-03
#> IQR        0.50000000   5.617978e-02
#> skewness   4.45599588   4.455996e+00
#> kurtosis  25.82823866   2.582824e+01
#> p00        0.50000000   0.000000e+00
#> p01        0.60000000   1.123596e-02
#> p05        0.70000000   2.247191e-02
#> p10        0.80000000   3.370787e-02
#> p20        0.90000000   4.494382e-02
#> p25        0.90000000   4.494382e-02
#> p30        1.00000000   5.617978e-02
#> p40        1.00000000   5.617978e-02
#> p50        1.10000000   6.741573e-02
#> p60        1.20000000   7.865169e-02
#> p70        1.30000000   8.988764e-02
#> p75        1.40000000   1.011236e-01
#> p80        1.70000000   1.348315e-01
#> p90        2.10000000   1.797753e-01
#> p95        3.00000000   2.808989e-01
#> p99        6.11400000   6.307865e-01
#> p100       9.40000000   1.000000e+00
plot(creatinine_minmax)


# Resolving Skewness  --------------------------
creatinine_log <- transform(heartfailure$creatinine, method = "log")
creatinine_log
#>   [1]  0.64185389  0.09531018  0.26236426  0.64185389  0.99325177  0.74193734
#>   [7]  0.18232156  0.09531018  0.40546511  2.24070969  1.38629436 -0.10536052
#>  [13]  0.09531018  0.09531018  0.00000000  0.26236426 -0.10536052 -0.22314355
#>  [19]  0.00000000  0.64185389  0.26236426  0.47000363 -0.10536052 -0.22314355
#>  [25]  0.60431597  0.64185389  0.00000000  0.26236426  1.75785792  0.18232156
#>  [31]  0.60431597  1.09861229  0.00000000  0.18232156  0.00000000  1.25276297
#>  [37]  0.00000000  0.00000000  0.83290912  1.09861229  0.60431597  0.18232156
#>  [43]  0.18232156  0.00000000  0.09531018  0.64185389 -0.10536052 -0.51082562
#>  [49]  1.48160454  0.00000000  0.00000000  0.33647224  1.91692261  0.00000000
#>  [55]  0.78845736  0.69314718  0.99325177 -0.51082562  0.09531018  0.26236426
#>  [61]  0.00000000  0.83290912  0.09531018  0.00000000  0.16551444  1.06471074
#>  [67]  0.26236426  0.00000000  0.18232156  0.60431597 -0.22314355 -0.10536052
#>  [73]  0.00000000  0.26236426  0.18232156 -0.35667494 -0.22314355  0.18232156
#>  [79] -0.51082562 -0.10536052  0.53062825  0.16551444  0.91629073  0.58778666
#>  [85]  0.00000000 -0.35667494  0.09531018 -0.22314355 -0.35667494  0.09531018
#>  [91] -0.22314355  0.00000000  0.16551444  0.53062825 -0.35667494  0.00000000
#>  [97]  0.26236426  0.09531018  0.18232156  0.09531018  0.09531018  0.16551444
#> [103]  0.09531018  0.00000000  0.83290912  0.53062825  0.26236426 -0.10536052
#> [109]  0.09531018  0.26236426  0.18232156  0.18232156  0.47000363  0.26236426
#> [115]  0.18232156  0.00000000 -0.35667494  1.16315081 -0.10536052  0.60431597
#> [121]  0.40546511  0.00000000 -0.28768207 -0.10536052  1.30833282  0.26236426
#> [127]  0.74193734 -0.22314355 -0.35667494  1.22377543 -0.35667494  1.80828877
#> [133]  0.16551444  0.26236426  0.16551444  0.16551444 -0.10536052  0.74193734
#> [139]  0.00000000 -0.22314355  0.09531018 -0.10536052 -0.10536052 -0.10536052
#> [145]  0.53062825 -0.35667494 -0.35667494  0.00000000  0.60431597 -0.10536052
#> [151]  0.91629073 -0.10536052 -0.10536052  0.16551444 -0.22314355  0.53062825
#> [157]  0.33647224  0.00000000  0.26236426  0.09531018  0.18232156 -0.22314355
#> [163] -0.10536052 -0.10536052  0.09531018  0.26236426 -0.35667494  0.87546874
#> [169]  0.00000000 -0.22314355  0.40546511 -0.10536052  0.09531018 -0.22314355
#> [175] -0.10536052  0.00000000  0.00000000  0.00000000  0.18232156 -0.35667494
#> [181] -0.10536052  0.00000000  0.18232156  0.91629073  0.18232156  0.40546511
#> [187] -0.51082562  0.74193734  0.00000000 -0.10536052  0.74193734  0.40546511
#> [193] -0.35667494  0.16551444  0.47000363  0.58778666  0.16551444 -0.22314355
#> [199]  0.00000000  0.58778666 -0.35667494  0.00000000 -0.10536052  1.25276297
#> [205] -0.35667494  0.00000000 -0.22314355 -0.10536052  0.00000000 -0.22314355
#> [211]  0.00000000 -0.22314355  0.33647224  0.47000363 -0.22314355  0.26236426
#> [217] -0.10536052  2.19722458  0.09531018 -0.35667494  0.60431597  0.09531018
#> [223]  0.09531018 -0.22314355  0.00000000  0.33647224  0.26236426  0.00000000
#> [229]  1.60943791  0.18232156  0.53062825  0.09531018 -0.10536052  0.33647224
#> [235]  0.09531018  0.09531018  0.09531018  0.18232156  0.00000000  0.16551444
#> [241]  0.26236426  0.26236426  0.09531018 -0.10536052  0.58778666  0.33647224
#> [247]  0.09531018  0.87546874  0.00000000  0.18232156 -0.69314718 -0.22314355
#> [253]  0.00000000  0.18232156  0.00000000  0.00000000  0.53062825  0.00000000
#> [259] -0.22314355 -0.35667494  0.00000000 -0.35667494  0.33647224  0.00000000
#> [265]  0.18232156 -0.10536052  0.60431597  0.53062825 -0.10536052  0.00000000
#> [271]  0.47000363 -0.10536052  0.18232156 -0.35667494  0.00000000 -0.22314355
#> [277]  0.09531018  0.09531018 -0.35667494  0.26236426  0.00000000  0.99325177
#> [283]  1.33500107  0.09531018 -0.22314355  0.18232156  0.53062825  0.00000000
#> [289]  0.09531018 -0.10536052 -0.22314355  0.33647224  0.00000000 -0.10536052
#> [295]  0.09531018  0.18232156 -0.22314355  0.33647224  0.47000363
summary(creatinine_log)
#> * Resolving Skewness with log
#> 
#> * Information of Transformation (before vs after)
#>              Original Transformation
#> n        299.00000000   299.00000000
#> na         0.00000000     0.00000000
#> mean       1.39387960     0.19858693
#> sd         1.03451006     0.45310789
#> se_mean    0.05982726     0.02620391
#> IQR        0.50000000     0.44183275
#> skewness   4.45599588     1.58398978
#> kurtosis  25.82823866     3.60164397
#> p00        0.50000000    -0.69314718
#> p01        0.60000000    -0.51082562
#> p05        0.70000000    -0.35667494
#> p10        0.80000000    -0.22314355
#> p20        0.90000000    -0.10536052
#> p25        0.90000000    -0.10536052
#> p30        1.00000000     0.00000000
#> p40        1.00000000     0.00000000
#> p50        1.10000000     0.09531018
#> p60        1.20000000     0.18232156
#> p70        1.30000000     0.26236426
#> p75        1.40000000     0.33647224
#> p80        1.70000000     0.53062825
#> p90        2.10000000     0.74193734
#> p95        3.00000000     1.09861229
#> p99        6.11400000     1.81046145
#> p100       9.40000000     2.24070969

plot(creatinine_log)


plot(creatinine_log, typographic = FALSE)


# Using dplyr ----------------------------------
library(dplyr)

heartfailure %>%
  mutate(creatinine_log = transform(creatinine, method = "log+1")) %>%
  lm(sodium ~ creatinine_log, data = .)
#> 
#> Call:
#> lm(formula = sodium ~ creatinine_log, data = .)
#> 
#> Coefficients:
#>    (Intercept)  creatinine_log  
#>        139.738          -3.791  
#> 
# }