summary method for "correlate" class.
# S3 method for correlate
summary(object, ...)
an object of class "correlate", usually, a result of a call to correlate().
further arguments passed to or from other methods.
summary.correlate compares the correlation coefficient by variables.
# \donttest{
library(dplyr)
# Correlation type is "generic" ===============================
# Correlation coefficients of all numerical variables
corr_tab <- correlate(heartfailure)
corr_tab
#> # A tibble: 42 × 3
#> var1 var2 coef_corr
#> <fct> <fct> <dbl>
#> 1 cpk_enzyme age -0.0814
#> 2 ejection_fraction age 0.0602
#> 3 platelets age -0.0525
#> 4 creatinine age 0.159
#> 5 sodium age -0.0459
#> 6 time age -0.224
#> 7 age cpk_enzyme -0.0814
#> 8 ejection_fraction cpk_enzyme -0.0441
#> 9 platelets cpk_enzyme 0.0245
#> 10 creatinine cpk_enzyme -0.0164
#> # … with 32 more rows
# summary correlate class
mat <- summary(corr_tab)
#> * correlation type : generic
#> * variable type : numeric
#> * correlation method : pearson
#>
#> * Matrix of Correlation
#> age cpk_enzyme ejection_fraction platelets
#> age 1.00000000 -0.081406394 0.06019547 -0.05247529
#> cpk_enzyme -0.08140639 1.000000000 -0.04407955 0.02446339
#> ejection_fraction 0.06019547 -0.044079554 1.00000000 0.07217747
#> platelets -0.05247529 0.024463389 0.07217747 1.00000000
#> creatinine 0.15923697 -0.016408480 -0.01130247 -0.04119808
#> sodium -0.04591178 0.059550156 0.17590228 0.06212462
#> time -0.22426485 -0.009345653 0.04172924 0.01051391
#> creatinine sodium time
#> age 0.15923697 -0.04591178 -0.224264849
#> cpk_enzyme -0.01640848 0.05955016 -0.009345653
#> ejection_fraction -0.01130247 0.17590228 0.041729235
#> platelets -0.04119808 0.06212462 0.010513909
#> creatinine 1.00000000 -0.18909521 -0.149315418
#> sodium -0.18909521 1.00000000 0.087640000
#> time -0.14931542 0.08764000 1.000000000
mat
#> age cpk_enzyme ejection_fraction platelets
#> age 1.00000000 -0.081406394 0.06019547 -0.05247529
#> cpk_enzyme -0.08140639 1.000000000 -0.04407955 0.02446339
#> ejection_fraction 0.06019547 -0.044079554 1.00000000 0.07217747
#> platelets -0.05247529 0.024463389 0.07217747 1.00000000
#> creatinine 0.15923697 -0.016408480 -0.01130247 -0.04119808
#> sodium -0.04591178 0.059550156 0.17590228 0.06212462
#> time -0.22426485 -0.009345653 0.04172924 0.01051391
#> creatinine sodium time
#> age 0.15923697 -0.04591178 -0.224264849
#> cpk_enzyme -0.01640848 0.05955016 -0.009345653
#> ejection_fraction -0.01130247 0.17590228 0.041729235
#> platelets -0.04119808 0.06212462 0.010513909
#> creatinine 1.00000000 -0.18909521 -0.149315418
#> sodium -0.18909521 1.00000000 0.087640000
#> time -0.14931542 0.08764000 1.000000000
# Select the variable to compute
corr_tab <- correlate(heartfailure, creatinine, sodium)
corr_tab
#> # A tibble: 12 × 3
#> var1 var2 coef_corr
#> <fct> <fct> <dbl>
#> 1 creatinine age 0.159
#> 2 sodium age -0.0459
#> 3 creatinine cpk_enzyme -0.0164
#> 4 sodium cpk_enzyme 0.0596
#> 5 creatinine ejection_fraction -0.0113
#> 6 sodium ejection_fraction 0.176
#> 7 creatinine platelets -0.0412
#> 8 sodium platelets 0.0621
#> 9 sodium creatinine -0.189
#> 10 creatinine sodium -0.189
#> 11 creatinine time -0.149
#> 12 sodium time 0.0876
# summary correlate class
mat <- summary(corr_tab)
#> * correlation type : generic
#> * variable type : numeric
#> * correlation method : pearson
#>
#> * Matrix of Correlation
#> age cpk_enzyme ejection_fraction platelets creatinine
#> creatinine 0.15923697 -0.01640848 -0.01130247 -0.04119808 1.0000000
#> sodium -0.04591178 0.05955016 0.17590228 0.06212462 -0.1890952
#> sodium time
#> creatinine -0.1890952 -0.1493154
#> sodium 1.0000000 0.0876400
mat
#> age cpk_enzyme ejection_fraction platelets creatinine
#> creatinine 0.15923697 -0.01640848 -0.01130247 -0.04119808 1.0000000
#> sodium -0.04591178 0.05955016 0.17590228 0.06212462 -0.1890952
#> sodium time
#> creatinine -0.1890952 -0.1493154
#> sodium 1.0000000 0.0876400
# Correlation type is "group" ===============================
##-----------------------------------------------------------
# If the target variable is a categorical variable
# Using dplyr
corr_tab <- heartfailure %>%
group_by(smoking, death_event) %>%
correlate()
corr_tab
#> # A tibble: 168 × 5
#> smoking death_event var1 var2 coef_corr
#> <fct> <fct> <fct> <fct> <dbl>
#> 1 No No cpk_enzyme age -0.0393
#> 2 No No ejection_fraction age 0.0749
#> 3 No No platelets age -0.0579
#> 4 No No creatinine age 0.199
#> 5 No No sodium age -0.0427
#> 6 No No time age -0.0193
#> 7 No No age cpk_enzyme -0.0393
#> 8 No No ejection_fraction cpk_enzyme -0.0819
#> 9 No No platelets cpk_enzyme 0.0610
#> 10 No No creatinine cpk_enzyme -0.0339
#> # … with 158 more rows
# summary correlate class
mat <- summary(corr_tab)
#> * correlation type : group
#> * variable type : numeric
#> * correlation method : pearson
#> * grouped variable : smoking death_event
#>
#> * Matrix of Correlation
#> $`smoking==No, death_event==No`
#> age cpk_enzyme ejection_fraction platelets
#> age 1.00000000 -0.03932533 0.07494209 -0.057933219
#> cpk_enzyme -0.03932533 1.00000000 -0.08191008 0.060955621
#> ejection_fraction 0.07494209 -0.08191008 1.00000000 0.128436628
#> platelets -0.05793322 0.06095562 0.12843663 1.000000000
#> creatinine 0.19903460 -0.03387538 -0.07321643 0.008958006
#> sodium -0.04266646 -0.02648112 0.06755437 -0.060053162
#> time -0.01931850 0.14510779 -0.10953185 0.059816017
#> creatinine sodium time
#> age 0.199034598 -0.04266646 -0.01931850
#> cpk_enzyme -0.033875376 -0.02648112 0.14510779
#> ejection_fraction -0.073216433 0.06755437 -0.10953185
#> platelets 0.008958006 -0.06005316 0.05981602
#> creatinine 1.000000000 -0.18333309 -0.09706253
#> sodium -0.183333092 1.00000000 -0.01004819
#> time -0.097062527 -0.01004819 1.00000000
#>
#> $`smoking==No, death_event==Yes`
#> age cpk_enzyme ejection_fraction platelets
#> age 1.000000000 -0.244982535 0.20596276 0.06959905
#> cpk_enzyme -0.244982535 1.000000000 0.01867736 0.12289268
#> ejection_fraction 0.205962765 0.018677358 1.00000000 -0.07703386
#> platelets 0.069599052 0.122892681 -0.07703386 1.00000000
#> creatinine -0.001137129 -0.005659517 0.29818897 -0.13448393
#> sodium 0.065087265 0.208502147 0.24330004 0.17132888
#> time -0.153561069 -0.025964177 -0.06353549 -0.10702500
#> creatinine sodium time
#> age -0.001137129 0.065087265 -0.153561069
#> cpk_enzyme -0.005659517 0.208502147 -0.025964177
#> ejection_fraction 0.298188973 0.243300041 -0.063535489
#> platelets -0.134483934 0.171328884 -0.107025002
#> creatinine 1.000000000 -0.031542959 0.103079105
#> sodium -0.031542959 1.000000000 0.001802173
#> time 0.103079105 0.001802173 1.000000000
#>
#> $`smoking==Yes, death_event==No`
#> age cpk_enzyme ejection_fraction platelets
#> age 1.00000000 -0.03994141 0.101390455 -0.201956742
#> cpk_enzyme -0.03994141 1.00000000 -0.068824376 -0.086150354
#> ejection_fraction 0.10139046 -0.06882438 1.000000000 0.003696456
#> platelets -0.20195674 -0.08615035 0.003696456 1.000000000
#> creatinine -0.01668342 -0.06171731 -0.201080553 -0.114687903
#> sodium 0.02193357 0.02006408 0.172238748 0.089680973
#> time -0.18212683 -0.10580850 -0.156272438 -0.066088921
#> creatinine sodium time
#> age -0.01668342 0.02193357 -0.18212683
#> cpk_enzyme -0.06171731 0.02006408 -0.10580850
#> ejection_fraction -0.20108055 0.17223875 -0.15627244
#> platelets -0.11468790 0.08968097 -0.06608892
#> creatinine 1.00000000 -0.29004414 0.24613787
#> sodium -0.29004414 1.00000000 -0.09327963
#> time 0.24613787 -0.09327963 1.00000000
#>
#> $`smoking==Yes, death_event==Yes`
#> age cpk_enzyme ejection_fraction platelets
#> age 1.00000000 0.200418337 0.32401712 0.024513961
#> cpk_enzyme 0.20041834 1.000000000 -0.00666207 0.007141623
#> ejection_fraction 0.32401712 -0.006662070 1.00000000 0.295602007
#> platelets 0.02451396 0.007141623 0.29560201 1.000000000
#> creatinine 0.25516009 -0.124380908 0.06565138 0.178705189
#> sodium -0.08011497 -0.116931357 -0.09548869 0.076094397
#> time -0.26232712 0.158942407 -0.35102116 -0.015134920
#> creatinine sodium time
#> age 0.25516009 -0.08011497 -0.26232712
#> cpk_enzyme -0.12438091 -0.11693136 0.15894241
#> ejection_fraction 0.06565138 -0.09548869 -0.35102116
#> platelets 0.17870519 0.07609440 -0.01513492
#> creatinine 1.00000000 -0.28572157 -0.20057143
#> sodium -0.28572157 1.00000000 0.08794994
#> time -0.20057143 0.08794994 1.00000000
#>
mat
#> $`smoking==No, death_event==No`
#> age cpk_enzyme ejection_fraction platelets
#> age 1.00000000 -0.03932533 0.07494209 -0.057933219
#> cpk_enzyme -0.03932533 1.00000000 -0.08191008 0.060955621
#> ejection_fraction 0.07494209 -0.08191008 1.00000000 0.128436628
#> platelets -0.05793322 0.06095562 0.12843663 1.000000000
#> creatinine 0.19903460 -0.03387538 -0.07321643 0.008958006
#> sodium -0.04266646 -0.02648112 0.06755437 -0.060053162
#> time -0.01931850 0.14510779 -0.10953185 0.059816017
#> creatinine sodium time
#> age 0.199034598 -0.04266646 -0.01931850
#> cpk_enzyme -0.033875376 -0.02648112 0.14510779
#> ejection_fraction -0.073216433 0.06755437 -0.10953185
#> platelets 0.008958006 -0.06005316 0.05981602
#> creatinine 1.000000000 -0.18333309 -0.09706253
#> sodium -0.183333092 1.00000000 -0.01004819
#> time -0.097062527 -0.01004819 1.00000000
#>
#> $`smoking==No, death_event==Yes`
#> age cpk_enzyme ejection_fraction platelets
#> age 1.000000000 -0.244982535 0.20596276 0.06959905
#> cpk_enzyme -0.244982535 1.000000000 0.01867736 0.12289268
#> ejection_fraction 0.205962765 0.018677358 1.00000000 -0.07703386
#> platelets 0.069599052 0.122892681 -0.07703386 1.00000000
#> creatinine -0.001137129 -0.005659517 0.29818897 -0.13448393
#> sodium 0.065087265 0.208502147 0.24330004 0.17132888
#> time -0.153561069 -0.025964177 -0.06353549 -0.10702500
#> creatinine sodium time
#> age -0.001137129 0.065087265 -0.153561069
#> cpk_enzyme -0.005659517 0.208502147 -0.025964177
#> ejection_fraction 0.298188973 0.243300041 -0.063535489
#> platelets -0.134483934 0.171328884 -0.107025002
#> creatinine 1.000000000 -0.031542959 0.103079105
#> sodium -0.031542959 1.000000000 0.001802173
#> time 0.103079105 0.001802173 1.000000000
#>
#> $`smoking==Yes, death_event==No`
#> age cpk_enzyme ejection_fraction platelets
#> age 1.00000000 -0.03994141 0.101390455 -0.201956742
#> cpk_enzyme -0.03994141 1.00000000 -0.068824376 -0.086150354
#> ejection_fraction 0.10139046 -0.06882438 1.000000000 0.003696456
#> platelets -0.20195674 -0.08615035 0.003696456 1.000000000
#> creatinine -0.01668342 -0.06171731 -0.201080553 -0.114687903
#> sodium 0.02193357 0.02006408 0.172238748 0.089680973
#> time -0.18212683 -0.10580850 -0.156272438 -0.066088921
#> creatinine sodium time
#> age -0.01668342 0.02193357 -0.18212683
#> cpk_enzyme -0.06171731 0.02006408 -0.10580850
#> ejection_fraction -0.20108055 0.17223875 -0.15627244
#> platelets -0.11468790 0.08968097 -0.06608892
#> creatinine 1.00000000 -0.29004414 0.24613787
#> sodium -0.29004414 1.00000000 -0.09327963
#> time 0.24613787 -0.09327963 1.00000000
#>
#> $`smoking==Yes, death_event==Yes`
#> age cpk_enzyme ejection_fraction platelets
#> age 1.00000000 0.200418337 0.32401712 0.024513961
#> cpk_enzyme 0.20041834 1.000000000 -0.00666207 0.007141623
#> ejection_fraction 0.32401712 -0.006662070 1.00000000 0.295602007
#> platelets 0.02451396 0.007141623 0.29560201 1.000000000
#> creatinine 0.25516009 -0.124380908 0.06565138 0.178705189
#> sodium -0.08011497 -0.116931357 -0.09548869 0.076094397
#> time -0.26232712 0.158942407 -0.35102116 -0.015134920
#> creatinine sodium time
#> age 0.25516009 -0.08011497 -0.26232712
#> cpk_enzyme -0.12438091 -0.11693136 0.15894241
#> ejection_fraction 0.06565138 -0.09548869 -0.35102116
#> platelets 0.17870519 0.07609440 -0.01513492
#> creatinine 1.00000000 -0.28572157 -0.20057143
#> sodium -0.28572157 1.00000000 0.08794994
#> time -0.20057143 0.08794994 1.00000000
#>
corr_tab <- heartfailure %>%
group_by(smoking, death_event) %>%
correlate(creatinine) %>%
filter(abs(coef_corr) >= 0.2)
corr_tab
#> # A tibble: 7 × 5
#> smoking death_event var1 var2 coef_corr
#> <fct> <fct> <fct> <fct> <dbl>
#> 1 No Yes creatinine ejection_fraction 0.298
#> 2 Yes No creatinine ejection_fraction -0.201
#> 3 Yes No creatinine sodium -0.290
#> 4 Yes No creatinine time 0.246
#> 5 Yes Yes creatinine age 0.255
#> 6 Yes Yes creatinine sodium -0.286
#> 7 Yes Yes creatinine time -0.201
# summary correlate class
mat <- summary(corr_tab)
#> * correlation type : group
#> * variable type : numeric
#> * correlation method : pearson
#> * grouped variable : smoking death_event
#>
#> * Matrix of Correlation
#> $`smoking==No, death_event==Yes`
#> ejection_fraction
#> creatinine 0.298189
#>
#> $`smoking==Yes, death_event==No`
#> ejection_fraction sodium time
#> creatinine -0.2010806 -0.2900441 0.2461379
#>
#> $`smoking==Yes, death_event==Yes`
#> age sodium time
#> creatinine 0.2551601 -0.2857216 -0.2005714
#>
mat
#> $`smoking==No, death_event==Yes`
#> ejection_fraction
#> creatinine 0.298189
#>
#> $`smoking==Yes, death_event==No`
#> ejection_fraction sodium time
#> creatinine -0.2010806 -0.2900441 0.2461379
#>
#> $`smoking==Yes, death_event==Yes`
#> age sodium time
#> creatinine 0.2551601 -0.2857216 -0.2005714
#>
# connect DBMS
con_sqlite <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
# copy heartfailure to the DBMS with a table named TB_HEARTFAILURE
copy_to(con_sqlite, heartfailure, name = "TB_HEARTFAILURE", overwrite = TRUE)
# Using pipes ---------------------------------
# Correlation coefficients of all numerical variables
corr_tab <- con_sqlite %>%
tbl("TB_HEARTFAILURE") %>%
correlate()
# summary correlate class
mat <- summary(corr_tab)
#> * correlation type : generic
#> * variable type : numeric
#> * correlation method : pearson
#>
#> * Matrix of Correlation
#> age cpk_enzyme ejection_fraction platelets
#> age 1.00000000 -0.081406394 0.06019547 -0.05247529
#> cpk_enzyme -0.08140639 1.000000000 -0.04407955 0.02446339
#> ejection_fraction 0.06019547 -0.044079554 1.00000000 0.07217747
#> platelets -0.05247529 0.024463389 0.07217747 1.00000000
#> creatinine 0.15923697 -0.016408480 -0.01130247 -0.04119808
#> sodium -0.04591178 0.059550156 0.17590228 0.06212462
#> time -0.22426485 -0.009345653 0.04172924 0.01051391
#> creatinine sodium time
#> age 0.15923697 -0.04591178 -0.224264849
#> cpk_enzyme -0.01640848 0.05955016 -0.009345653
#> ejection_fraction -0.01130247 0.17590228 0.041729235
#> platelets -0.04119808 0.06212462 0.010513909
#> creatinine 1.00000000 -0.18909521 -0.149315418
#> sodium -0.18909521 1.00000000 0.087640000
#> time -0.14931542 0.08764000 1.000000000
mat
#> age cpk_enzyme ejection_fraction platelets
#> age 1.00000000 -0.081406394 0.06019547 -0.05247529
#> cpk_enzyme -0.08140639 1.000000000 -0.04407955 0.02446339
#> ejection_fraction 0.06019547 -0.044079554 1.00000000 0.07217747
#> platelets -0.05247529 0.024463389 0.07217747 1.00000000
#> creatinine 0.15923697 -0.016408480 -0.01130247 -0.04119808
#> sodium -0.04591178 0.059550156 0.17590228 0.06212462
#> time -0.22426485 -0.009345653 0.04172924 0.01051391
#> creatinine sodium time
#> age 0.15923697 -0.04591178 -0.224264849
#> cpk_enzyme -0.01640848 0.05955016 -0.009345653
#> ejection_fraction -0.01130247 0.17590228 0.041729235
#> platelets -0.04119808 0.06212462 0.010513909
#> creatinine 1.00000000 -0.18909521 -0.149315418
#> sodium -0.18909521 1.00000000 0.087640000
#> time -0.14931542 0.08764000 1.000000000
# Disconnect DBMS
DBI::dbDisconnect(con_sqlite)
# }