Follows on from ‘Scores from matching things’
### this is just the code that creates the "copy to clipboard" function in the code blocks
htmltools::tagList(
xaringanExtra::use_clipboard(
button_text = "<i class=\"fa fa-clone fa-2x\" style=\"color: #301e64\"></i>",
success_text = "<i class=\"fa fa-check fa-2x\" style=\"color: #90BE6D\"></i>",
error_text = "<i class=\"fa fa-times fa-2x\" style=\"color: #F94144\"></i>"
),
rmarkdown::html_dependency_font_awesome()
)
as_tibble(list(x = 1,
y = 1)) -> tibDat
ggplot(data = tibDat,
aes(x = x, y = y)) +
geom_text(label = "Derangements #2",
size = 20,
colour = "red",
angle = 30,
lineheight = 1) +
xlab("") +
ylab("") +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.line = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank())
Updated to add contact me 11.ix.22
In my last post here, Scores from matching things I gave the background to the probabilities of achieving certain scores on matching tasks by chance alone to help explain the perhaps counter-intuitive finding that matching four or more things correctly is unlikely by chance alone at p < .05 regardless of the number of objects to be matched.
This just adds a bit more to that, mostly as plots and complements both that Rblog post and an “ordinary” blog post, Sometimes n=4 is enough.
What I wanted was to show how rapidly the probabilities of achieving any particular score stabilise to an asymptotic value as n increases. Here we are for n from 4 to 15 and scores from 4 to 10.
### create some functions (as in previous post)
all.derangements <- function(n){
cumprob <- prob <- number <- term <- score <- rev(0:n)
for (m in 1:n) {
i <- m+1
s <- n-m
term[i] <- ((-1)^(m))/(factorial(m))
}
term[1] <- 1
for (i in 0:n) {
s <- i+1
prob[s] <- (sum(term[1:s]))/factorial(n-i)
}
number <- factorial(n)*prob
for (s in 0:n) {
m <- n-s
i <- m+1
cumprob[i] <- sum(prob[1:i])
}
tmp <- cbind(n, score,number,prob,cumprob)
tmp
}
p.derange.score <- function(score,n){
if (score > n) stop("Score cannot be greater than n")
if (score == (n-1)) stop ("Score cannot be n-1")
cumprob <- prob <- term <- rev(0:n)
for (m in 1:n) {
i <- m+1
s <- n-m
term[i] <- ((-1)^(m))/(factorial(m))
}
term[1] <- 1
for (i in 0:n) {
s <- i+1
prob[s] <- (sum(term[1:s]))/factorial(n-i)
}
for (s in 0:n) {
m <- n-s
i <- m+1
cumprob[i] <- sum(prob[1:i])
}
cumprob[n+1-score]
}
### now let's go a bit further
### get all the possible scores for n from 4 to 30
lapply(4:30, FUN = all.derangements) -> tmpList
### I always forget this nice little bit of base R and I'm a bit surprised that there doesn't seem to be a nice tidyverse alternative
do.call(rbind.data.frame, tmpList) %>%
as_tibble() -> tmpTib
### this was just to produce some tables for my blog post at https://www.psyctc.org/psyctc/2022/07/23/sometimes-n4-is-enough/
# tmpTib %>%
# write_csv(file = "derangements.csv")
### ditto
# 1:14 %>%
# as_tibble() %>%
# rename(n = value) %>%
# mutate(PossibleWays = factorial(n),
# PossibleWays = prettyNum(PossibleWays, big.mark = ",")) %>%
# write_csv(file = "numbers.csv")
### but Vectorizing the function seemed cleaner so ...
Vectorize(FUN = all.derangements) -> All.derangements
All.derangements(4:14) -> tmpList
### back to the do.call() just for the tables
# do.call(rbind, tmpList) %>%
# as_tibble() %>%
# filter(score == 4) %>%
# select(n, number) %>%
# mutate(number = prettyNum(number, big.mark = ",")) %>%
# write_csv("correct.csv")
# do.call(rbind, tmpList) %>%
# as_tibble() %>%
# filter(score == 4) %>%
# mutate(totalPerms = factorial(n)) %>%
# select(-prob) %>%
# select(n, totalPerms, everything()) %>%
# write_csv("final.csv")
### OK, now for this Rblog post!
All.derangements(4:15) -> tmpList
do.call(rbind, tmpList) %>%
as_tibble() %>%
filter(score > 3 & score < 11) %>%
mutate(score = ordered(score,
levels = 4:10)) -> tmpTib
ggplot(data = tmpTib,
aes(x = n, y = cumprob, colour = score, group = score)) +
geom_point() +
geom_line() +
scale_x_continuous(breaks = 3:15,
minor_breaks = 3:15,
limits = c(3, 15)) +
ylab("p") +
theme_bw()
Here’s the same on a log10 y axis to separate the p values for the higher scores.
ggplot(data = tmpTib,
aes(x = n, y = cumprob, colour = score, group = score)) +
geom_point() +
geom_line() +
scale_x_continuous(breaks = 3:15,
minor_breaks = 3:15,
limits = c(3, 15)) +
scale_y_continuous(trans = "log10") +
ylab("p") +
theme_bw()
This next table shows how rapidly p values of real interest stabilise. The table is ordered by number of objects (n) within score. The column p is the probability of getting that score or better by chance alone, diffProb is the absolute change in that p value from the one for the previous n, diffPerc is the difference as a percentage of the previous p value. diffProbLT001 flags when the change in absolute p vaue is below .001 at which point I think in my realm any further precision is spurious. However, diffLT1pct flags when the change in p value is below 1% of the previous p value just in case someone wants that sort precise convergence.
All.derangements(4:20) -> tmpList
do.call(rbind, tmpList) %>%
as_tibble() %>%
filter(score > 3 & score < 11) -> tmpTib
### just working out how stable the p values get how soon
tmpTib %>%
arrange(score) %>%
group_by(score) %>%
mutate(diffProb = abs(cumprob - lag(cumprob)),
diffProbLT001 = if_else(diffProb < .001, "Y", "N"),
diffPerc = 100 * diffProb /lag(cumprob),
diffLT1pct = if_else(diffPerc < 1, "Y", "N")) %>%
ungroup() %>%
select(-c(number, prob)) %>%
rename(p = cumprob) %>%
select(score, everything()) %>%
as_hux() %>%
set_position("left") %>% # left align the whole table
set_bold(row = everywhere, col = everywhere) %>% # everything into bold
set_align(everywhere, everywhere, "center") %>% # everything centred
set_align(everywhere, 1:2, "right") %>% # but now right justify the first two columns
map_text_color(by_values("Y" = "green")) %>% # colour matches by text recognition
map_text_color(by_values("N" = "red"))
score | n | p | diffProb | diffProbLT001 | diffPerc | diffLT1pct |
---|---|---|---|---|---|---|
4 | 4 | 0.0417 | ||||
4 | 5 | 0.00833 | 0.0333 | N | 80 | N |
4 | 6 | 0.0222 | 0.0139 | N | 167 | N |
4 | 7 | 0.0183 | 0.00397 | N | 17.9 | N |
4 | 8 | 0.0191 | 0.000868 | Y | 4.76 | N |
4 | 9 | 0.019 | 0.000154 | Y | 0.807 | Y |
4 | 10 | 0.019 | 2.31e-05 | Y | 0.122 | Y |
4 | 11 | 0.019 | 3.01e-06 | Y | 0.0158 | Y |
4 | 12 | 0.019 | 3.44e-07 | Y | 0.00181 | Y |
4 | 13 | 0.019 | 3.53e-08 | Y | 0.000186 | Y |
4 | 14 | 0.019 | 3.28e-09 | Y | 1.73e-05 | Y |
4 | 15 | 0.019 | 2.78e-10 | Y | 1.47e-06 | Y |
4 | 16 | 0.019 | 2.17e-11 | Y | 1.15e-07 | Y |
4 | 17 | 0.019 | 1.57e-12 | Y | 8.29e-09 | Y |
4 | 18 | 0.019 | 1.06e-13 | Y | 5.59e-10 | Y |
4 | 19 | 0.019 | 6.71e-15 | Y | 3.53e-11 | Y |
4 | 20 | 0.019 | 3.99e-16 | Y | 2.1e-12 | Y |
5 | 5 | 0.00833 | ||||
5 | 6 | 0.00139 | 0.00694 | N | 83.3 | N |
5 | 7 | 0.00437 | 0.00298 | N | 214 | N |
5 | 8 | 0.0035 | 0.000868 | Y | 19.9 | N |
5 | 9 | 0.00369 | 0.000193 | Y | 5.52 | N |
5 | 10 | 0.00366 | 3.47e-05 | Y | 0.941 | Y |
5 | 11 | 0.00366 | 5.26e-06 | Y | 0.144 | Y |
5 | 12 | 0.00366 | 6.89e-07 | Y | 0.0188 | Y |
5 | 13 | 0.00366 | 7.95e-08 | Y | 0.00217 | Y |
5 | 14 | 0.00366 | 8.2e-09 | Y | 0.000224 | Y |
5 | 15 | 0.00366 | 7.65e-10 | Y | 2.09e-05 | Y |
5 | 16 | 0.00366 | 6.52e-11 | Y | 1.78e-06 | Y |
5 | 17 | 0.00366 | 5.12e-12 | Y | 1.4e-07 | Y |
5 | 18 | 0.00366 | 3.72e-13 | Y | 1.02e-08 | Y |
5 | 19 | 0.00366 | 2.52e-14 | Y | 6.87e-10 | Y |
5 | 20 | 0.00366 | 1.59e-15 | Y | 4.35e-11 | Y |
6 | 6 | 0.00139 | ||||
6 | 7 | 0.000198 | 0.00119 | N | 85.7 | N |
6 | 8 | 0.000719 | 0.000521 | Y | 262 | N |
6 | 9 | 0.000565 | 0.000154 | Y | 21.5 | N |
6 | 10 | 0.0006 | 3.47e-05 | Y | 6.15 | N |
6 | 11 | 0.000593 | 6.31e-06 | Y | 1.05 | N |
6 | 12 | 0.000594 | 9.65e-07 | Y | 0.163 | Y |
6 | 13 | 0.000594 | 1.27e-07 | Y | 0.0214 | Y |
6 | 14 | 0.000594 | 1.48e-08 | Y | 0.00248 | Y |
6 | 15 | 0.000594 | 1.53e-09 | Y | 0.000258 | Y |
6 | 16 | 0.000594 | 1.44e-10 | Y | 2.42e-05 | Y |
6 | 17 | 0.000594 | 1.23e-11 | Y | 2.07e-06 | Y |
6 | 18 | 0.000594 | 9.67e-13 | Y | 1.63e-07 | Y |
6 | 19 | 0.000594 | 7.04e-14 | Y | 1.19e-08 | Y |
6 | 20 | 0.000594 | 4.78e-15 | Y | 8.04e-10 | Y |
7 | 7 | 0.000198 | ||||
7 | 8 | 2.48e-05 | 0.000174 | Y | 87.5 | N |
7 | 9 | 0.000102 | 7.72e-05 | Y | 311 | N |
7 | 10 | 7.88e-05 | 2.31e-05 | Y | 22.7 | N |
7 | 11 | 8.41e-05 | 5.26e-06 | Y | 6.68 | N |
7 | 12 | 8.31e-05 | 9.65e-07 | Y | 1.15 | N |
7 | 13 | 8.33e-05 | 1.48e-07 | Y | 0.179 | Y |
7 | 14 | 8.32e-05 | 1.97e-08 | Y | 0.0236 | Y |
7 | 15 | 8.32e-05 | 2.3e-09 | Y | 0.00276 | Y |
7 | 16 | 8.32e-05 | 2.39e-10 | Y | 0.000287 | Y |
7 | 17 | 8.32e-05 | 2.25e-11 | Y | 2.7e-05 | Y |
7 | 18 | 8.32e-05 | 1.93e-12 | Y | 2.32e-06 | Y |
7 | 19 | 8.32e-05 | 1.53e-13 | Y | 1.83e-07 | Y |
7 | 20 | 8.32e-05 | 1.12e-14 | Y | 1.34e-08 | Y |
8 | 8 | 2.48e-05 | ||||
8 | 9 | 2.76e-06 | 2.2e-05 | Y | 88.9 | N |
8 | 10 | 1.27e-05 | 9.92e-06 | Y | 360 | N |
8 | 11 | 9.67e-06 | 3.01e-06 | Y | 23.7 | N |
8 | 12 | 1.04e-05 | 6.89e-07 | Y | 7.12 | N |
8 | 13 | 1.02e-05 | 1.27e-07 | Y | 1.23 | N |
8 | 14 | 1.03e-05 | 1.97e-08 | Y | 0.192 | Y |
8 | 15 | 1.02e-05 | 2.62e-09 | Y | 0.0256 | Y |
8 | 16 | 1.02e-05 | 3.08e-10 | Y | 0.003 | Y |
8 | 17 | 1.02e-05 | 3.22e-11 | Y | 0.000314 | Y |
8 | 18 | 1.02e-05 | 3.04e-12 | Y | 2.96e-05 | Y |
8 | 19 | 1.02e-05 | 2.62e-13 | Y | 2.55e-06 | Y |
8 | 20 | 1.02e-05 | 2.07e-14 | Y | 2.02e-07 | Y |
9 | 9 | 2.76e-06 | ||||
9 | 10 | 2.76e-07 | 2.48e-06 | Y | 90 | N |
9 | 11 | 1.4e-06 | 1.13e-06 | Y | 409 | N |
9 | 12 | 1.06e-06 | 3.44e-07 | Y | 24.6 | N |
9 | 13 | 1.14e-06 | 7.95e-08 | Y | 7.51 | N |
9 | 14 | 1.12e-06 | 1.48e-08 | Y | 1.3 | N |
9 | 15 | 1.13e-06 | 2.3e-09 | Y | 0.204 | Y |
9 | 16 | 1.13e-06 | 3.08e-10 | Y | 0.0273 | Y |
9 | 17 | 1.13e-06 | 3.62e-11 | Y | 0.00322 | Y |
9 | 18 | 1.13e-06 | 3.8e-12 | Y | 0.000337 | Y |
9 | 19 | 1.13e-06 | 3.6e-13 | Y | 3.2e-05 | Y |
9 | 20 | 1.13e-06 | 3.11e-14 | Y | 2.76e-06 | Y |
10 | 10 | 2.76e-07 | ||||
10 | 11 | 2.51e-08 | 2.51e-07 | Y | 90.9 | N |
10 | 12 | 1.4e-07 | 1.15e-07 | Y | 458 | N |
10 | 13 | 1.05e-07 | 3.53e-08 | Y | 25.3 | N |
10 | 14 | 1.13e-07 | 8.2e-09 | Y | 7.85 | N |
10 | 15 | 1.11e-07 | 1.53e-09 | Y | 1.36 | N |
10 | 16 | 1.11e-07 | 2.39e-10 | Y | 0.215 | Y |
10 | 17 | 1.11e-07 | 3.22e-11 | Y | 0.0289 | Y |
10 | 18 | 1.11e-07 | 3.8e-12 | Y | 0.00341 | Y |
10 | 19 | 1.11e-07 | 4e-13 | Y | 0.000359 | Y |
10 | 20 | 1.11e-07 | 3.8e-14 | Y | 3.41e-05 | Y |
Do contact me if this interests you and if you might want to use the method with real data.
16/04/2024 at 18:34
Text and figures are licensed under Creative Commons Attribution CC BY-SA 4.0. The figures that have been reused from other sources don't fall under this license and can be recognized by a note in their caption: "Figure from ...".
For attribution, please cite this work as
Evans (2022, July 23). Chris (Evans) R SAFAQ: Derangements #2. Retrieved from https://www.psyctc.org/R_blog/posts/2022-07-23-derangements-2/
BibTeX citation
@misc{evans2022derangements, author = {Evans, Chris}, title = {Chris (Evans) R SAFAQ: Derangements #2}, url = {https://www.psyctc.org/R_blog/posts/2022-07-23-derangements-2/}, year = {2022} }