---
title: "The Sampling Distribution of the Sample Mean"
output: html_document
---

```{r}
population <- c(rep(1, 5), rep(2, 7), rep(9, 4))
sample_1 <- sample(population, 30, replace=TRUE)
sample_2 <- sample(population, 30, replace=TRUE)
sample_3 <- sample(population, 30, replace=TRUE)
sample_4 <- sample(population, 30, replace=TRUE)
sample_5 <- sample(population, 30, replace=TRUE)

means <- c(mean(sample_1), mean(sample_2), mean(sample_3), mean(sample_4), mean(sample_5))
std_errors <- c(sd(sample_1)/sqrt(10), sd(sample_2)/sqrt(10), sd(sample_3)/sqrt(10), sd(sample_4)/sqrt(10), sd(sample_5)/sqrt(10))

plot(means, ylim=c(0,10), xaxt="n", xlab="", ylab="")
axis(1, at=1:5, labels=c("Mean-1", "Mean-2", "Mean-3", "Mean-4", "Mean-5"))
arrows(1:5, means-std_errors, 1:5, means+std_errors, code=3, length=0.02, angle=90)
abline(h=mean(population), col="red")
```

```{r}
par(mfrow=c(2,2))

population <- c(rep(1,3), rep(2, 2), rep(9, 1))

plot(table(population), xlab="Value", ylab="Frequency", main="Population")
abline(v = mean(population), col="blue")

N <- 2
samples <- replicate(10000, sample(population, N, replace=TRUE))
samples.means = apply(samples, 2, mean)

g <- samples.means
h <- hist(g, breaks = 10, density = 10,
          col = "lightgray", xlab = "Mean", main = "N = 2") 
xfit <- seq(min(g), max(g), length = 40) 
yfit <- dnorm(xfit, mean = mean(population), sd = sd(population)/sqrt(N)) 
yfit <- yfit * diff(h$mids[1:2]) * length(g) 

lines(xfit, yfit, col = "red", lwd = 2)
abline(v = mean(population), col="blue")


N <- 10
samples <- replicate(10000, sample(population, N, replace=TRUE))
samples.means = apply(samples, 2, mean)

g <- samples.means
h <- hist(g, breaks = 10, density = 10,
          col = "lightgray", xlab = "Mean", main = "N = 10") 
xfit <- seq(min(g), max(g), length = 40) 
yfit <- dnorm(xfit, mean = mean(population), sd = sd(population)/sqrt(N)) 
yfit <- yfit * diff(h$mids[1:2]) * length(g) 

lines(xfit, yfit, col = "red", lwd = 2)
abline(v = mean(population), col="blue")

N <- 50
samples <- replicate(10000, sample(population, N, replace=TRUE))
samples.means = apply(samples, 2, mean)

g <- samples.means
h <- hist(g, breaks = 10, density = 10,
          col = "lightgray", xlab = "Mean", main = "N = 50") 
xfit <- seq(min(g), max(g), length = 40) 
yfit <- dnorm(xfit, mean = mean(population), sd = sd(population)/sqrt(N)) 
yfit <- yfit * diff(h$mids[1:2]) * length(g) 

lines(xfit, yfit, col = "red", lwd = 2)
abline(v = mean(population), col="blue")


```