Description: This script compare two ways of generating confidence interval for median, one using non-parametric bootstrp with 1000 bootstrap samples, one by robust estimator. The two different confidence interval is used in Morte Carlo simulation for converage probablity and width, for different distributions and sample sizes.
bootmedian <- function(x, rep = 1000) {
n = length(x) # sample size for each bootstrap sample
bootsample = sample(x, n*rep, replace = TRUE) # generate bootstrap sample by sampling with replacement
dim(bootsample) = c(rep, n) # each row is one bootstrap sample, there are 1000 rows
boot.median = apply(bootsample, 1, median) # compute median for each bootstrap sample
lowerci = quantile(boot.median, 0.025) # 95% upper confidence bound
upperci = quantile(boot.median, 0.975) # 95% lower confidence bound
return(c(lowerci, upperci)) # return upper and lower confidence bound as a vector
}
bootmedianci <- function(xmat, target) {
medianci = apply(xmat, 1, bootmedian) # get bootstrap 95% confidence interval for median
cover_p = mean((medianci[1,] <= target) & (medianci[2,] >= target)) # coverage probability
width = mean(medianci[2, ] - mean(medianci[1, ])) # average width of 1000 bootstrap confidence interval
return(c(cover_p, width)) # return coverage probability and width of confidence interval
}
robustmedianci <- function(xmat, target) {
n = dim(xmat)[2] # sample size for each observation
samplemedian = apply(xmat, 1, median) # median for each sample
absdeviation = abs(xmat - samplemedian) # absolute deviation from median
median_absdev = apply(absdeviation, 1, median) # median absolute deviation
m = qnorm(1-(1-0.95)/2) # 2.5% quantile of standard normal distribution
lowerci = samplemedian - m*1.49*median_absdev/sqrt(n) # lower bound of 95% confidence interval
upperci = samplemedian + m*1.49*median_absdev/sqrt(n) # upper bound of 95% confidence interval
cover_p = mean((lowerci < target) & (upperci > target)) # coverage probability
width = mean(upperci-lowerci) # average width of 95% confidence interval for median
return(c(cover_p, width))
}
The above 3 functions are used to generate bootstrap and robust confidence interval for median.
n = 200 # number of samples
samplesize = c(10, 25, 50, 100) #sample size
library(ggplot2)
# normal (0,5)
normalprob = matrix(0, 4, 2) # matrix to save coverage probability for normal
normalwidth = matrix(0, 4, 2) # matrix to save CI width for normal
for (i in 1:4) {
k = samplesize[i] # sample size
x = rnorm(n*k, 0, 3) # sample from normal distribution of mean 0 and sd 5
dim(x) = c(n, samplesize[i]) # get sample matrix in correct dimension
normalprob[i,1] = bootmedianci(x, 0)[1]
normalwidth[i,1] = bootmedianci(x, 0)[2]
normalprob[i,2] = robustmedianci(x, 0)[1]
normalwidth[i,2] = robustmedianci(x, 0)[2]
}
normalprob <- data.frame(SampleSize = samplesize, Bootstrap = normalprob[,1], Robust = normalprob[,2])
normalwidth <- data.frame(SampleSize = samplesize, Bootstrap = normalwidth[,1], Robust = normalwidth[,2])
# Exponential(0.5)
expprob = matrix(0, 4, 2) # matrix to save coverage probability for normal
expwidth = matrix(0, 4, 2) # matrix to save CI width for normal
for (i in 1:4) {
k = samplesize[i] # sample size
x = rexp(n*k, 0.5) # sample from normal distribution of mean 0 and sd 5
target = qexp(0.5, 0.5)
dim(x) = c(n, samplesize[i]) # get sample matrix in correct dimension
expprob[i,1] = bootmedianci(x, target)[1]
expwidth[i,1] = bootmedianci(x, target)[2]
expprob[i,2] = robustmedianci(x, target)[1]
expwidth[i,2] = robustmedianci(x, target)[2]
}
expprob <- data.frame(SampleSize = samplesize, Bootstrap = expprob[,1], Robust = expprob[,2])
expwidth <- data.frame(SampleSize = samplesize, Bootstrap = expwidth[,1], Robust = expwidth[,2])
# uniform (-5,5)
unifprob <- matrix(0, 4, 2) # matrix to save coverage probability for normal
unifwidth <- matrix(0, 4, 2) # matrix to save CI width for normal
for (i in 1:4) {
k = samplesize[i] # sample size
x = runif(n*k, -5, 5) # sample from normal distribution of mean 0 and sd 5
target = qunif(0.5, -5, 5)
dim(x) = c(n, samplesize[i]) # get sample matrix in correct dimension
unifprob[i,1] = bootmedianci(x, target)[1]
unifwidth[i,1] = bootmedianci(x, target)[2]
unifprob[i,2] = robustmedianci(x, target)[1]
unifwidth[i,2] = robustmedianci(x, target)[2]
}
unifprob <- data.frame(SampleSize = samplesize, Bootstrap = unifprob[,1], Robust = unifprob[,2])
unifwidth <- data.frame(SampleSize = samplesize, Bootstrap = unifwidth[,1], Robust = unifwidth[,2])
# Poisson(2)
poisprob <- matrix(0, 4, 2) # matrix to save coverage probability for normal
poiswidth <- matrix(0, 4, 2) # matrix to save CI width for normal
for (i in 1:4) {
k = samplesize[i] # sample size
x = rpois(n*k, 2) # sample from normal distribution of mean 0 and sd 5
target = qpois(0.5, 2)
dim(x) = c(n, samplesize[i]) # get sample matrix in correct dimension
poisprob[i,1] = bootmedianci(x, target)[1]
poiswidth[i,1] = bootmedianci(x, target)[2]
poisprob[i,2] = robustmedianci(x, target)[1]
poiswidth[i,2] = robustmedianci(x, target)[2]
}
poisprob <- data.frame(SampleSize = samplesize, Bootstrap = poisprob[,1], Robust = poisprob[,2])
poiswidth <- data.frame(SampleSize = samplesize, Bootstrap = poiswidth[,1], Robust = poiswidth[,2])
The above code generate coverage probability and average width of confidence interval for each distribution and each sample size(10, 25, 50, 100)
require(gridExtra)
## Loading required package: gridExtra
p1 = ggplot(data = normalprob, aes(x = SampleSize, y = value, color = variable)) +
geom_line(aes(y = Bootstrap, col = "Bootstrap"), size = 1.5) +
geom_line(aes(y = Robust, col = "Robust"), size = 1.5) +
xlab("Sample Size") + ylab("Coverage Probability") +
ggtitle("Coverage Probability for Normal Distribution(0, 5)")
p2 = ggplot(data = normalwidth, aes(x = SampleSize, y = value, color = variable)) +
geom_line(aes(y = Bootstrap, col = "Bootstrap"), size = 1.5) +
geom_line(aes(y = Robust, col = "Robust"), size = 1.5) +
xlab("Sample Size") + ylab("Width of 95% Confidence Interval") +
ggtitle("Width of Confidence Interval for Normal Distribution(0, 5)")
grid.arrange(p1, p2, nrow=2)
p3 = ggplot(data = expprob, aes(x = SampleSize, y = value, color = variable)) +
geom_line(aes(y = Bootstrap, col = "Bootstrap"), size = 1.5) +
geom_line(aes(y = Robust, col = "Robust"), size = 1.5) +
xlab("Sample Size") + ylab("Coverage Probability") +
ggtitle("Coverage Probability for Exponential Distribution(0.5)")
p4 = ggplot(data = expwidth, aes(x = SampleSize, y = value, color = variable)) +
geom_line(aes(y = Bootstrap, col = "Bootstrap"), size = 1.5) +
geom_line(aes(y = Robust, col = "Robust"), size = 1.5) +
xlab("Sample Size") + ylab("Waidth of 95% Confidence Interval") +
ggtitle("Width of Confidence Interval for Exponential Distribution(0.5)")
grid.arrange(p3, p4, nrow=2)
p5 = ggplot(data = unifprob, aes(x = SampleSize, y = value, color = variable)) +
geom_line(aes(y = Bootstrap, col = "Bootstrap"), size = 1.5) +
geom_line(aes(y = Robust, col = "Robust"), size = 1.5) +
xlab("Sample Size") + ylab("Coverage Probability") +
ggtitle("Coverage Probability for Uniform Distribution(-5,5)")
p6 = ggplot(data = unifwidth, aes(x = SampleSize, y = value, color = variable)) +
geom_line(aes(y = Bootstrap, col = "Bootstrap"), size = 1.5) +
geom_line(aes(y = Robust, col = "Robust"), size = 1.5) +
xlab("Sample Size") + ylab("Width of 95% Confidence Interval") +
ggtitle("Width of Confidence Interval for Uniform Distribution(-5,5)")
grid.arrange(p5, p6, nrow=2)
p7 = ggplot(data = poisprob, aes(x = SampleSize, y = value, color = variable)) +
geom_line(aes(y = Bootstrap, col = "Bootstrap"), size = 1.5) +
geom_line(aes(y = Robust, col = "Robust"), size = 1.5) +
xlab("Sample Size") + ylab("Coverage Probability") +
ggtitle("Coverage Probability for Poisson Distribution(2)")
p8 = ggplot(data = poiswidth, aes(x = SampleSize, y = value, color = variable)) +
geom_line(aes(y = Bootstrap, col = "Bootstrap"), size = 1.5) +
geom_line(aes(y = Robust, col = "Robust"), size = 1.5) +
xlab("Sample Size") + ylab("Width of 95% Confidence Interval") +
ggtitle("Width of Confidence Interval for Poisson Distribution(2)")
grid.arrange(p7, p8, nrow=2)
Observing coverage probability and width of 95% confidence interval for median, bootstrap confidence interval usually has a higher coverage probability and greater width than robust confidence interval. Width for confidence interval also decreases when sample size increases across distribution and method to generate confidence interval.