#! /usr/bin/R sink(file="/dev/null") # to stop getting the Hmisc announcement text # library(Hmisc) binconf <- function (x, n, alpha = 0.05, method = c("wilson", "exact", "asymptotic", "all"), include.x = FALSE, include.n = FALSE, return.df = FALSE) { method <- match.arg(method) bc <- function(x, n, alpha, method) { nu1 <- 2 * (n - x + 1) nu2 <- 2 * x ll <- if (x > 0) x/(x + qf(1 - alpha/2, nu1, nu2) * (n - x + 1)) else 0 nu1p <- nu2 + 2 nu2p <- nu1 - 2 pp <- if (x < n) qf(1 - alpha/2, nu1p, nu2p) else 1 ul <- ((x + 1) * pp)/(n - x + (x + 1) * pp) zcrit <- -qnorm(alpha/2) z2 <- zcrit * zcrit p <- x/n cl <- (p + z2/2/n + c(-1, 1) * zcrit * sqrt((p * (1 - p) + z2/4/n)/n))/(1 + z2/n) if (x == 1) cl[1] <- -log(1 - alpha)/n if (x == (n - 1)) cl[2] <- 1 + log(1 - alpha)/n asymp.lcl <- x/n - qnorm(1 - alpha/2) * sqrt(((x/n) * (1 - x/n))/n) asymp.ucl <- x/n + qnorm(1 - alpha/2) * sqrt(((x/n) * (1 - x/n))/n) res <- rbind(c(ll, ul), cl, c(asymp.lcl, asymp.ucl)) res <- cbind(rep(x/n, 3), res) switch(method, wilson = res[2, ], exact = res[1, ], asymptotic = res[3, ], all = res, res) } if ((length(x) != length(n)) & length(x) == 1) x <- rep(x, length(n)) if ((length(x) != length(n)) & length(n) == 1) n <- rep(n, length(x)) if ((length(x) > 1 | length(n) > 1) & method == "all") { method <- "wilson" warning("method=all will not work with vectors...setting method to wilson") } if (method == "all" & length(x) == 1 & length(n) == 1) { mat <- bc(x, n, alpha, method) dimnames(mat) <- list(c("Exact", "Wilson", "Asymptotic"), c("PointEst", "Lower", "Upper")) if (include.n) mat <- cbind(N = n, mat) if (include.x) mat <- cbind(X = x, mat) if (return.df) mat <- as.data.frame(mat) return(mat) } mat <- matrix(ncol = 3, nrow = length(x)) for (i in 1:length(x)) mat[i, ] <- bc(x[i], n[i], alpha = alpha, method = method) dimnames(mat) <- list(rep("", dim(mat)[1]), c("PointEst", "Lower", "Upper")) if (include.n) mat <- cbind(N = n, mat) if (include.x) mat <- cbind(X = x, mat) if (return.df) mat <- as.data.frame(mat) mat } ci.diff.props2 <- function(x1, n1, x2, n2, conf.val = 0.95, round.n = 2) { # Implements Newcombe-Wilson estimate for confidence interval # of a difference in proportions zalpha <- qnorm(0.5 + (conf.val/2)) zsquare <- zalpha^2 p1 <- x1/n1 p2 <- x2/n2 den1 <- 2 * (n1 + zsquare) den2 <- 2 * (n2 + zsquare) L1 <- (2*n1*p1 + zsquare - zalpha*sqrt(zsquare + 4*n1*(p1*(1-p1))))/den1 U1 <- (2*n1*p1 + zsquare + zalpha*sqrt(zsquare + 4*n1*(p1*(1-p1))))/den1 L2 <- (2*n2*p2 + zsquare - zalpha*sqrt(zsquare + 4*n2*(p2*(1-p2))))/den2 U2 <- (2*n2*p2 + zsquare + zalpha*sqrt(zsquare + 4*n2*(p2*(1-p2))))/den2 interval1 <- zalpha * sqrt(((L1*(1-L1))/n1) + ((U2*(1-U2))/n2)) interval2 <- zalpha * sqrt(((U1*(1-U1))/n1) + ((L2*(1-L2))/n2)) diff <- p1 - p2 lwr <- diff - interval1 upr <- diff + interval2 return(c(lwr,upr)) } sink() # switch output back to stdout tag(HTML) tag(HEAD) tag(TITLE) cat("Confidence interval of difference between two proportions") untag(TITLE) untag(HEAD) lf(2) tag(BODY, bgcolor = "lime") lf(2) tag(center) cat("
") cat("Your input") cat(" | ") cat("") cat("||
---|---|---|
")
now <- system('date +%Y-%m-%d,%T', intern=TRUE)
host <- system("echo $REMOTE_ADDR", intern=TRUE)
cat("Request from: ",host, " at", now," ") cat(" | ")
cat("||
What | Yours | Reference | ") cat("
") cat("Numerator: ") cat(" | ") cat("") cat(numerat1) cat(" | ") cat("") cat(numerat2) cat(" | ") cat("
") cat("Denominator: ") cat(" | ") cat("") cat(denom1) cat(" | ") cat("") cat(denom2) cat(" | ") cat("
Proportion | ") cat("",propn2," | ") cat("|
") cat("C.I. wanted: ") cat(" | ") cat("") cat(CI) cat("%") cat(" | ") cat("|
") cat("Decimal places wanted: ") cat(" | ") cat("") cat(round) cat(" | ") cat("|
") cat("Results") cat(" | ") cat("||
")
alpha <- 1 - CI/100
ci <- CI/100
ci.sample <- binconf(numerat1,denom1,alpha=alpha,method="wilson")
tmp1 <- paste(numerat1,"out of",denom1,"is",round(ci.sample[1],round))
cat("For your sample, ",tmp1)
tmp2 <- paste(" The ",CI,"% CI for that is from ",round(ci.sample[2],round)," to ",round(ci.sample[3],round),sep="")
cat(tmp2)
ci.ref <- binconf(ref.numerat,denom2,alpha=alpha,method="wilson")
tmp1 <- paste(ref.numerat,"out of",denom2,"is",round(ci.ref[1],round))
cat(" For reference data, ",tmp1) tmp2 <- paste(" The ",CI,"% CI for that is from ",round(ci.ref[2],round)," to ",round(ci.ref[3],round),sep="") cat(tmp2) ci.diff <- ci.diff.props2(numerat1,denom1,ref.numerat,denom2,ci,round) cat(" | ")
cat("||
") cat("Explanation and advice for CORE system users and others") cat(" | ") cat("||
A ",CI,"% CI will embrace the difference between the proportions in the two populations")
cat(" i.e. the "true" difference in ",CI,"% of times you take two samples to compare.")
cat(" The larger the samples you take, the tighter the CI for the difference you'll see")
cat(" but they'll be "right" ")
cat(CI,"% of the time, i.e. "wrong":not embrace the population proportion ")
wrong <- 100-CI
cat(wrong,"% of the time.")
cat(" All this is based on the assumptions that you are taking samples in which the individual observations") cat(" are independent of one another and are from infinite populations.") cat(" The maths are pretty robust to finite populations, i.e. the real world.") if ((min(ci.diff) < 0) && (max(ci.diff) > 0)) { cat(" Since the CI of that difference includes zero, this is equivalent in significance test terms") cat(" to having found a non-significant difference in proportions at criterion alpha of ") alpha <- 1 - ci cat(alpha) } else { cat(" Since the CI of that difference does not include zero, this is equivalent in significance test terms") cat(" to having found a significant difference in proportions at criterion alpha of ") alpha <- 1 - ci cat(alpha) } cat(" Statistical significance is often overvalued.") cat(" All that statistical significance is telling you is that a difference ") cat("as large or larger than that was likely to happen less than one time in twenty ") cat("if you were taking samples of sizes ") cat(denom1," and ",denom2) cat(" from infinitely large populations in which the proportions are the same.") cat(" The advantage of the CI is that it tells you the precision you'd expect from random") cat(" of samples of size ") cat(denom1," and ",denom2) cat(". What is generally really important in the context of the CI you have here") cat(" is how big the difference between the reference value and your observed value actually is.") cat(" Is that difference clinically or managerially interesting to you or your service? ") cat("If you are using this to compare CORE system parameters you have") cat(" with those from CORE benchmarks or descriptors, always bear in mind differences between") cat(" your or your service's setting and methods that might have contributed to any difference") cat(" you see between your proportion and that in the reference dataset; same if you don't see") cat(" a difference!") cat(" | ")
cat("||
") cat("Technicalities") cat(" | ") cat("||
")
cat("Confidence interval was calculated using the Newcomb-Wilson method, generally considered the best method by statisticians.")
cat(" Calculation done in R, that for the two separate proportions used the function binconf() written by Frank E. Harrell Jr. and contained in his")
cat(" almost vital Hmisc package for R which can be obtained from CRAN.")
lf(2)
cat("CGI script written by Chris Evans using David Firth's excellent GGIwithR package. ")
linkto("Contact me if something isn't working right ...", "http://www.psyctc.org/cgi-bin/mailto.pl?webmaster") ; br()
lf()
cat(" ") cat("Output produced at ", date()) cat(" ") lf() cat(" | ")
cat("