--- title: "lecture12" output: html_document --- ```{r} # Positive Covariance x <- 1:100 y <- 1:100 + rnorm(100, sd = 5) plot(x, y) abline(h = mean(y)) abline(v = mean(x)) cov(x, y) ``` ```{r} # Negative Covariance x <- 1:100 y <- 100:1 + rnorm(100, sd = 5) plot(x, y) abline(h = mean(y)) abline(v = mean(x)) cov(x, y) ``` ```{r} # Close-to-zero Covariance x <- rnorm(100, sd = 5) y <- rnorm(100, sd = 5) plot(x, y) abline(h = mean(y)) abline(v = mean(x)) cov(x, y) ``` ```{r} # Covariance is sensitive to units age <- c(20, 24, 25, 27, 30, 35) salary_euro <- c(500, 1000, 1000, 1500, 2500, 3000) salary_cent <- salary_euro * 100 cov(age, salary_euro) cov(age, salary_cent) ``` ```{r} # Peason's Correlation Coefficient cov(age, salary_euro) / (sd(age) * sd(salary_euro)) cov(age, salary_cent) / (sd(age) * sd(salary_cent)) # Alternative: Standardization first age_z <- (age - mean(age))/sd(age) salary_euro_z <- (salary_euro - mean(salary_euro))/sd(salary_euro) salary_cent_z <- (salary_cent - mean(salary_cent))/sd(salary_cent) cov(age_z, salary_euro_z) cov(age_z, salary_cent_z) # cor cor(age, salary_euro) cor(age, salary_cent) ``` ```{r} # Experiment: Peason's Correlation exp1_age <- c(20, 21, 30, 31, 45, 50) exp_time <- c(100, 100, 120, 130, 130, 200) # Two-Tailed Test cor.test(exp1_age, exp_time) # One-Tailed Test r > 0 cor.test(exp1_age, exp_time, alternative = "greater") # One-Tailed Test r < 0 cor.test(exp1_age, exp_time, alternative = "less") ``` ```{r} # z-Test as alternative to t-Test for correlation # Age-Time-Experiment w <- 1/2*log((1+cor(exp1_age, exp_time))/(1-cor(exp1_age, exp_time))) z <- w / sqrt(1/(6-3)) z (1-pnorm(z))*2 ``` ```{r} # Point-biserial Correlation traffic <- c(1, 0, 1, 0, 1, 0, 1, 0) crossing <- c(5, 2, 10, 1, 8, 3, 9, 2) # Two Groups traffic_0 <- crossing[seq(2, length(traffic), 2)] traffic_1 <- crossing[seq(1, length(traffic), 2)] # Scatterplot and r_bp plot(crossing ~ traffic, data = data.frame(traffic, crossing)) abline(h = mean(crossing)) abline(v = mean(traffic)) rbp <- sqrt(4*4/56)*(mean(traffic_1) - mean(traffic_0))/sd(crossing) tbp <- rbp * sqrt(6/(1-rbp**2)) tbp cor.test(crossing, traffic) # Comparison to t-Test boxplot(traffic_0, traffic_1) t.test(traffic_1, traffic_0, var.equal = T) # alternative notation: t.test(crossing ~ traffic) ``` ```{r} # Spearman's Correlation: Example util <- c(2, 1, 5, 4, 3) like <- c(4, 4, 2, 1, 3) cor.test(util, like, alternative = "less", method="spearman") ```