library(tidyverse)
library(broom)
library(fivethirtyeight) # for data
candy_rankings
## # A tibble: 85 x 13
## competitorname chocolate fruity caramel peanutyalmondy nougat
## <chr> <lgl> <lgl> <lgl> <lgl> <lgl>
## 1 100 Grand TRUE FALSE TRUE FALSE FALSE
## 2 3 Musketeers TRUE FALSE FALSE FALSE TRUE
## 3 One dime FALSE FALSE FALSE FALSE FALSE
## 4 One quarter FALSE FALSE FALSE FALSE FALSE
## 5 Air Heads FALSE TRUE FALSE FALSE FALSE
## 6 Almond Joy TRUE FALSE FALSE TRUE FALSE
## 7 Baby Ruth TRUE FALSE TRUE TRUE TRUE
## 8 Boston Baked … FALSE FALSE FALSE TRUE FALSE
## 9 Candy Corn FALSE FALSE FALSE FALSE FALSE
## 10 Caramel Apple… FALSE TRUE TRUE FALSE FALSE
## # … with 75 more rows, and 7 more variables: crispedricewafer <lgl>,
## # hard <lgl>, bar <lgl>, pluribus <lgl>, sugarpercent <dbl>,
## # pricepercent <dbl>, winpercent <dbl>
ggplot(candy_rankings, aes(x = pricepercent, y = winpercent)) +
geom_point()
model_lm <- lm(winpercent ~ pricepercent, data = candy_rankings)
tidy(model_lm)
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 42.0 2.91 14.4 2.39e-24
## 2 pricepercent 17.8 5.30 3.35 1.21e- 3
augment(model_lm) %>%
ggplot(aes(x = pricepercent, y = winpercent)) +
geom_point() +
geom_line(aes(y = .fitted))
model_lm <- lm(winpercent ~ pricepercent + chocolate, data = candy_rankings)
tidy(model_lm)
## # A tibble: 3 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 41.6 2.40 17.3 4.35e-29
## 2 pricepercent 1.66 5.08 0.328 7.44e- 1
## 3 chocolateTRUE 18.3 2.91 6.29 1.46e- 8
augment(model_lm) %>%
ggplot(aes(x = pricepercent, y = winpercent,
color = chocolate,
shape = chocolate,
linetype = chocolate)) +
geom_point() +
geom_line(aes(y = .fitted))
candy <- candy_rankings
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
candy3d <- plot_ly(data = candy_rankings,
x = ~pricepercent,
y = ~sugarpercent,
z = ~winpercent,
type = "scatter3d")
candy3d
## No scatter3d mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
model_lm <- lm(winpercent ~ pricepercent + sugarpercent, data = candy_rankings)
xy_plane <- expand.grid(0:100, 0:100)/100
ps_plane <- xy_plane %>%
rename(pricepercent = Var1,
sugarpercent = Var2)
lm_plane <- augment(model_lm, newdata = ps_plane)
lm_matrix <- matrix(lm_plane$.fitted, nrow = 101, ncol = 101)
candy3d %>%
add_surface(
x = ~(0:100)/100,
y = ~(0:100)/100,
z = ~lm_matrix)
## No scatter3d mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
tidy(model_lm)
## # A tibble: 3 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 39.8 3.44 11.6 6.33e-19
## 2 pricepercent 15.6 5.60 2.78 6.72e- 3
## 3 sugarpercent 6.73 5.66 1.19 2.38e- 1
chocolate3d <- plot_ly(data = candy_rankings,
x = ~pricepercent,
y = ~sugarpercent,
z = ~winpercent,
color = ~chocolate,
colors = c("#2d708e", "#d8576b"),
mode = "markers",
symbol = ~chocolate,
symbols = c("o", "circle"),
type = "scatter3d",
showlegend = FALSE)
chocolate3d
candy <- candy_rankings
model_lm <- lm(winpercent ~ pricepercent + sugarpercent + chocolate, data = candy)
ps_plane <- ps_plane %>%
mutate(chocolate = TRUE)
lm_plane <- augment(model_lm, newdata = ps_plane)
lm_matrix_true <- matrix(lm_plane$.fitted, nrow = 101, ncol = 101)
ps_plane <- ps_plane %>%
mutate(chocolate = FALSE)
lm_plane <- augment(model_lm, newdata = ps_plane)
lm_matrix_false <- matrix(lm_plane$.fitted, nrow = 101, ncol = 101)
chocolate3d %>%
add_surface(
x = ~(0:100)/100,
y = ~(0:100)/100,
z = ~lm_matrix_true,
showscale = FALSE,
inherit = FALSE,
colorscale = list(c(0, 1), c("#f0f921", "#7201a8"))) %>%
add_surface(
x = ~(0:100)/100,
y = ~(0:100)/100,
z = ~lm_matrix_false,
showscale = FALSE,
inherit = FALSE,
colorscale = list(c(0, 1), c("#3cbb75", "#481567")))
tidy(model_lm)
## # A tibble: 4 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 38.6 2.81 13.8 6.50e-23
## 2 pricepercent -1.66 5.27 -0.315 7.54e- 1
## 3 sugarpercent 9.04 4.63 1.95 5.42e- 2
## 4 chocolateTRUE 18.7 2.87 6.53 5.40e- 9
model_lm_all <- lm(winpercent ~ ., candy_rankings %>% select(-competitorname))
tidy(model_lm_all)
## # A tibble: 12 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 34.5 4.32 7.99 1.44e-11
## 2 chocolateTRUE 19.7 3.90 5.07 2.96e- 6
## 3 fruityTRUE 9.42 3.76 2.50 1.45e- 2
## 4 caramelTRUE 2.22 3.66 0.608 5.45e- 1
## 5 peanutyalmondyTRUE 10.1 3.62 2.79 6.81e- 3
## 6 nougatTRUE 0.804 5.72 0.141 8.88e- 1
## 7 crispedricewaferTRUE 8.92 5.27 1.69 9.47e- 2
## 8 hardTRUE -6.17 3.46 -1.78 7.85e- 2
## 9 barTRUE 0.442 5.06 0.0872 9.31e- 1
## 10 pluribusTRUE -0.854 3.04 -0.281 7.79e- 1
## 11 sugarpercent 9.09 4.66 1.95 5.50e- 2
## 12 pricepercent -5.93 5.51 -1.08 2.86e- 1
rbind(glance(model_lm), glance(model_lm_all))
## # A tibble: 2 x 12
## r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.433 0.412 11.3 20.6 5.20e-10 3 -325. 659. 671.
## 2 0.540 0.471 10.7 7.80 9.50e- 9 11 -316. 657. 689.
## # … with 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>