What is the association?
library(gapminder)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3 ✓ purrr 0.3.4
## ✓ tibble 3.0.5 ✓ dplyr 1.0.3
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
ggplot(gapminder, aes(x = gdpPercap, y = lifeExp)) +
geom_point()
gapminder %>%
filter(year == 2007) %>%
ggplot(aes(x = gdpPercap, y = lifeExp)) +
geom_point() +
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
gm2007 <- gapminder %>%
filter(year == 2007)
model_lm <- lm(lifeExp ~ gdpPercap, data = gm2007)
summary(model_lm)
##
## Call:
## lm(formula = lifeExp ~ gdpPercap, data = gm2007)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.828 -6.316 1.922 6.898 13.128
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.957e+01 1.010e+00 58.95 <2e-16 ***
## gdpPercap 6.371e-04 5.827e-05 10.93 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.899 on 140 degrees of freedom
## Multiple R-squared: 0.4606, Adjusted R-squared: 0.4567
## F-statistic: 119.5 on 1 and 140 DF, p-value: < 2.2e-16
library(broom)
tidy(model_lm)
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 59.6 1.01 59.0 9.89e-101
## 2 gdpPercap 0.000637 0.0000583 10.9 1.69e- 20
glance(model_lm)
## # A tibble: 1 x 12
## r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.461 0.457 8.90 120. 1.69e-20 1 -511. 1028. 1037.
## # … with 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>
augment(model_lm) %>%
ggplot(aes(gdpPercap, lifeExp)) +
geom_point() +
geom_line(aes(gdpPercap, .fitted))
model_loess <- loess(lifeExp ~ gdpPercap,
span = .5,
degree = 2,
data = gm2007)
augment(model_loess) %>%
ggplot(aes(gdpPercap, lifeExp)) +
geom_point() +
geom_line(aes(gdpPercap, .fitted))
model_lm2 <- lm(lifeExp ~ gdpPercap + poly(gdpPercap, 2), data = gm2007)
augment(model_lm2) %>%
ggplot(aes(gdpPercap, lifeExp)) +
geom_point() +
geom_line(aes(gdpPercap, .fitted))
c(.2, .7) %>% map_dfr(~ loess(lifeExp ~ gdpPercap, span = .x, data = gapminder) %>% augment() %>% mutate(span = .x))