---
title: "lecture12"
output: html_document
---
```{r}
# Positive Covariance
x <- 1:100
y <- 1:100 + rnorm(100, sd = 5)
plot(x, y)
abline(h = mean(y))
abline(v = mean(x))
cov(x, y)
```
```{r}
# Negative Covariance
x <- 1:100
y <- 100:1 + rnorm(100, sd = 5)
plot(x, y)
abline(h = mean(y))
abline(v = mean(x))
cov(x, y)
```
```{r}
# Close-to-zero Covariance
x <- rnorm(100, sd = 5)
y <- rnorm(100, sd = 5)
plot(x, y)
abline(h = mean(y))
abline(v = mean(x))
cov(x, y)
```
```{r}
# Covariance is sensitive to units
age <- c(20, 24, 25, 27, 30, 35)
salary_euro <- c(500, 1000, 1000, 1500, 2500, 3000)
salary_cent <- salary_euro * 100
cov(age, salary_euro)
cov(age, salary_cent)
```
```{r}
# Peason's Correlation Coefficient
cov(age, salary_euro) / (sd(age) * sd(salary_euro))
cov(age, salary_cent) / (sd(age) * sd(salary_cent))
# Alternative: Standardization first
age_z <- (age - mean(age))/sd(age)
salary_euro_z <- (salary_euro - mean(salary_euro))/sd(salary_euro)
salary_cent_z <- (salary_cent - mean(salary_cent))/sd(salary_cent)
cov(age_z, salary_euro_z)
cov(age_z, salary_cent_z)
# cor
cor(age, salary_euro)
cor(age, salary_cent)
```
```{r}
# Experiment: Peason's Correlation
exp1_age <- c(20, 21, 30, 31, 45, 50)
exp_time <- c(100, 100, 120, 130, 130, 200)
# Two-Tailed Test
cor.test(exp1_age, exp_time)
# One-Tailed Test r > 0
cor.test(exp1_age, exp_time, alternative = "greater")
# One-Tailed Test r < 0
cor.test(exp1_age, exp_time, alternative = "less")
```
```{r}
# z-Test as alternative to t-Test for correlation
# Age-Time-Experiment
w <- 1/2*log((1+cor(exp1_age, exp_time))/(1-cor(exp1_age, exp_time)))
z <- w / sqrt(1/(6-3))
z
(1-pnorm(z))*2
```
```{r}
# Point-biserial Correlation
traffic <- c(1, 0, 1, 0, 1, 0, 1, 0)
crossing <- c(5, 2, 10, 1, 8, 3, 9, 2)
# Two Groups
traffic_0 <- crossing[seq(2, length(traffic), 2)]
traffic_1 <- crossing[seq(1, length(traffic), 2)]
# Scatterplot and r_bp
plot(crossing ~ traffic, data = data.frame(traffic, crossing))
abline(h = mean(crossing))
abline(v = mean(traffic))
rbp <- sqrt(4*4/56)*(mean(traffic_1) - mean(traffic_0))/sd(crossing)
tbp <- rbp * sqrt(6/(1-rbp**2))
tbp
cor.test(crossing, traffic)
# Comparison to t-Test
boxplot(traffic_0, traffic_1)
t.test(traffic_1, traffic_0, var.equal = T)
# alternative notation: t.test(crossing ~ traffic)
```
```{r}
# Spearman's Correlation: Example
util <- c(2, 1, 5, 4, 3)
like <- c(4, 4, 2, 1, 3)
cor.test(util, like, alternative = "less", method="spearman")
```