--- title: "The Sampling Distribution of the Sample Mean" output: html_document --- ```{r} population <- c(rep(1, 5), rep(2, 7), rep(9, 4)) sample_1 <- sample(population, 30, replace=TRUE) sample_2 <- sample(population, 30, replace=TRUE) sample_3 <- sample(population, 30, replace=TRUE) sample_4 <- sample(population, 30, replace=TRUE) sample_5 <- sample(population, 30, replace=TRUE) means <- c(mean(sample_1), mean(sample_2), mean(sample_3), mean(sample_4), mean(sample_5)) std_errors <- c(sd(sample_1)/sqrt(10), sd(sample_2)/sqrt(10), sd(sample_3)/sqrt(10), sd(sample_4)/sqrt(10), sd(sample_5)/sqrt(10)) plot(means, ylim=c(0,10), xaxt="n", xlab="", ylab="") axis(1, at=1:5, labels=c("Mean-1", "Mean-2", "Mean-3", "Mean-4", "Mean-5")) arrows(1:5, means-std_errors, 1:5, means+std_errors, code=3, length=0.02, angle=90) abline(h=mean(population), col="red") ``` ```{r} par(mfrow=c(2,2)) population <- c(rep(1,3), rep(2, 2), rep(9, 1)) plot(table(population), xlab="Value", ylab="Frequency", main="Population") abline(v = mean(population), col="blue") N <- 2 samples <- replicate(10000, sample(population, N, replace=TRUE)) samples.means = apply(samples, 2, mean) g <- samples.means h <- hist(g, breaks = 10, density = 10, col = "lightgray", xlab = "Mean", main = "N = 2") xfit <- seq(min(g), max(g), length = 40) yfit <- dnorm(xfit, mean = mean(population), sd = sd(population)/sqrt(N)) yfit <- yfit * diff(h$mids[1:2]) * length(g) lines(xfit, yfit, col = "red", lwd = 2) abline(v = mean(population), col="blue") N <- 10 samples <- replicate(10000, sample(population, N, replace=TRUE)) samples.means = apply(samples, 2, mean) g <- samples.means h <- hist(g, breaks = 10, density = 10, col = "lightgray", xlab = "Mean", main = "N = 10") xfit <- seq(min(g), max(g), length = 40) yfit <- dnorm(xfit, mean = mean(population), sd = sd(population)/sqrt(N)) yfit <- yfit * diff(h$mids[1:2]) * length(g) lines(xfit, yfit, col = "red", lwd = 2) abline(v = mean(population), col="blue") N <- 50 samples <- replicate(10000, sample(population, N, replace=TRUE)) samples.means = apply(samples, 2, mean) g <- samples.means h <- hist(g, breaks = 10, density = 10, col = "lightgray", xlab = "Mean", main = "N = 50") xfit <- seq(min(g), max(g), length = 40) yfit <- dnorm(xfit, mean = mean(population), sd = sd(population)/sqrt(N)) yfit <- yfit * diff(h$mids[1:2]) * length(g) lines(xfit, yfit, col = "red", lwd = 2) abline(v = mean(population), col="blue") ```