Chris (Evans) R SAFAQ: Subscale/total correlations

Chris Evans

### this is just the code that creates the "copy to clipboard" function in the code blocks htmltools::tagList( xaringanExtra::use_clipboard( button_text = "", success_text = "", error_text = "" ), rmarkdown::html_dependency_font_awesome() )

library(tidyverse) valK <- 340 0:340 %>% as_tibble() %>% rename(fraction = value) %>% mutate(fraction = fraction / valK, R = sqrt(fraction)) -> tibRvals tibble(scale = c("CORE-OM WB (4/34)", "CORE-OM Risk (6/34)", "CORE-OM Problems or Functioning (17/34)", "AS any subscale (4/12)"), fraction = c(4/34, 6/34, 18/34, 4/12)) %>% mutate(R = sqrt(fraction)) -> tibCOREandAS ggplot(data = tibRvals, aes(x = fraction, y = R)) + geom_point() + geom_line() + geom_linerange(data = tibCOREandAS, aes(xmin = 0, xmax = fraction, y = R)) + geom_linerange(data = tibCOREandAS, aes(x = fraction, ymin = 0, ymax = R)) + geom_text(data = tibCOREandAS, aes(x = 0, y = R + .015, label = scale), hjust = 0, size = 2.2) + xlab(bquote(k[subscale]/k[total])) + ylab("Asymptotic correlation") + ggtitle("Plot of asymptotic subscale/total correlation\nagainst proportion of total items in subscale") + scale_x_continuous(breaks = (0:10/10)) + theme_bw() + theme(plot.title = element_text(hjust = .5))

library(tidyverse)
options(dplyr.summarise.inform = FALSE)

### generate Gaussian null model data
set.seed(12345) # set for reproducible results
valN <- 5000 # sample size
valK <- 12 # total number of items

### now make up the data in long format, i.e.
###   an item score
###   an item label
###   a person ID
rnorm(valN * valK) %>% # gets uncorrelated Gaussian data
  as_tibble() %>%
  mutate(itemN = ((row_number() - 1) %% 12) + 1, # use modulo arithmetic to get item number
         item = str_c("I", sprintf("%02.0f", itemN)), # format it nicely
         ID = ((row_number() - 1) %/% 12) + 1, # use modulo arithmetic to get person ID 
         ID = sprintf("%03.0f", ID)) %>% # and format that, can now dump itemN
  select(-itemN) -> tibLongItemDat

### now just pivot that to get it into wide format, valK items per row
tibLongItemDat %>%
  pivot_wider(id_cols = ID, names_from = item, values_from = value) -> tibWideItemDat

### map items to scales (just sequentially here, that's not the AS mapping)
vecItemsScale1 <- str_c("I", sprintf("%02.0f", 1:4))
vecItemsScale2 <- str_c("I", sprintf("%02.0f", 5:8))
vecItemsScale3 <- str_c("I", sprintf("%02.0f", 9:12))

### now use those maps to get the subscale scores as well as the total score
tibWideItemDat %>%
  rowwise() %>%
  mutate(scoreAll = mean(c_across(-ID)),
         score1 = mean(c_across(all_of(vecItemsScale1))),
         score2 = mean(c_across(all_of(vecItemsScale2))),
         score3 = mean(c_across(all_of(vecItemsScale3)))) %>%
  ungroup() -> tibWideAllDat

tibWideAllDat %>%
  select(starts_with("score")) -> tibScores

### corrr::correlate() has a message about the method and handling of missing
### punches through markdown despite the block header having "message=FALSE"
### I could have wrapped this in suppressMessages() however you can suppress 
### that with "quiet = TRUE", see below
tibScores%>%
  ### here is the "quiet = TRUE" suppression of the message
  corrr::correlate(diagonal = 1, quiet = TRUE) %>%
  mutate(across(starts_with("score"), round, 2)) %>%
  pander::pander(justify = "lrrrr", digits = 2)

scoreAll

0.57

0.58

0.59

score1

0.57

-0.01

0.01

score2

0.58

-0.01

0.01

score3

0.59

0.01

lm_fn <- function(data, mapping, ...){ p <- ggplot(data = data, mapping = mapping) + geom_point(alpha = .05) + geom_smooth(method=lm, fill="blue", color="blue", ...) p } GGally::ggpairs(tibScores, lower = list(continuous = lm_fn)) + theme_bw()

@misc{evans2021subscale/total, author = {Evans, Chris}, title = {Chris (Evans) R SAFAQ: Subscale/total correlations}, url = {https://www.psyctc.org/R_blog/posts/2021-10-31-subscaletotal-correlations/}, year = {2021} }

Subscale/total correlations

Author

Affiliation

Published

Citation

Last updated

Footnotes

References

Citation