---
title: "lecture12"
output: html_document
---

```{r}
# Positive Covariance

x <- 1:100
y <- 1:100 + rnorm(100, sd = 5)

plot(x, y)
abline(h = mean(y))
abline(v = mean(x))

cov(x, y)
```

```{r}
# Negative Covariance

x <- 1:100
y <- 100:1 + rnorm(100, sd = 5)

plot(x, y)
abline(h = mean(y))
abline(v = mean(x))

cov(x, y)
```

```{r}
# Close-to-zero Covariance

x <- rnorm(100, sd = 5)
y <- rnorm(100, sd = 5)

plot(x, y)
abline(h = mean(y))
abline(v = mean(x))

cov(x, y)
```

```{r}
# Covariance is sensitive to units

age <- c(20, 24, 25, 27, 30, 35)
salary_euro <- c(500, 1000, 1000, 1500, 2500, 3000)
salary_cent <- salary_euro * 100

cov(age, salary_euro)
cov(age, salary_cent)
```

```{r}
# Peason's Correlation Coefficient

cov(age, salary_euro) / (sd(age) * sd(salary_euro))
cov(age, salary_cent) / (sd(age) * sd(salary_cent))

# Alternative: Standardization first

age_z <- (age - mean(age))/sd(age)
salary_euro_z <- (salary_euro - mean(salary_euro))/sd(salary_euro)
salary_cent_z <- (salary_cent - mean(salary_cent))/sd(salary_cent)
cov(age_z, salary_euro_z)
cov(age_z, salary_cent_z)

# cor
cor(age, salary_euro)
cor(age, salary_cent)
```

```{r}
# Experiment: Peason's Correlation

exp1_age <- c(20, 21, 30, 31, 45, 50)
exp_time <- c(100, 100, 120, 130, 130, 200)

# Two-Tailed Test

cor.test(exp1_age, exp_time)

# One-Tailed Test r > 0

cor.test(exp1_age, exp_time, alternative = "greater")

# One-Tailed Test r < 0

cor.test(exp1_age, exp_time, alternative = "less")
```


```{r}
# z-Test as alternative to t-Test for correlation

# Age-Time-Experiment
w <- 1/2*log((1+cor(exp1_age, exp_time))/(1-cor(exp1_age, exp_time)))
z <- w / sqrt(1/(6-3))
z
(1-pnorm(z))*2
```


```{r}
# Point-biserial Correlation

traffic <- c(1, 0, 1, 0, 1, 0, 1, 0)
crossing <- c(5, 2, 10, 1, 8, 3, 9, 2)

# Two Groups

traffic_0 <- crossing[seq(2, length(traffic), 2)]
traffic_1 <- crossing[seq(1, length(traffic), 2)]


# Scatterplot and r_bp

plot(crossing ~ traffic, data = data.frame(traffic, crossing))
abline(h = mean(crossing))
abline(v = mean(traffic))

rbp <- sqrt(4*4/56)*(mean(traffic_1) - mean(traffic_0))/sd(crossing)
tbp <- rbp * sqrt(6/(1-rbp**2)) 
tbp

cor.test(crossing, traffic)

# Comparison to t-Test

boxplot(traffic_0, traffic_1)

t.test(traffic_1, traffic_0, var.equal = T)
# alternative notation: t.test(crossing ~ traffic)
```


```{r}
# Spearman's Correlation: Example

util <- c(2, 1, 5, 4, 3)
like <- c(4, 4, 2, 1, 3)

cor.test(util, like, alternative = "less", method="spearman")
```