Candy rankings

library(tidyverse)
library(broom)
library(fivethirtyeight) # for data
candy_rankings

## # A tibble: 85 x 13
##    competitorname chocolate fruity caramel peanutyalmondy nougat
##    <chr>          <lgl>     <lgl>  <lgl>   <lgl>          <lgl> 
##  1 100 Grand      TRUE      FALSE  TRUE    FALSE          FALSE 
##  2 3 Musketeers   TRUE      FALSE  FALSE   FALSE          TRUE  
##  3 One dime       FALSE     FALSE  FALSE   FALSE          FALSE 
##  4 One quarter    FALSE     FALSE  FALSE   FALSE          FALSE 
##  5 Air Heads      FALSE     TRUE   FALSE   FALSE          FALSE 
##  6 Almond Joy     TRUE      FALSE  FALSE   TRUE           FALSE 
##  7 Baby Ruth      TRUE      FALSE  TRUE    TRUE           TRUE  
##  8 Boston Baked … FALSE     FALSE  FALSE   TRUE           FALSE 
##  9 Candy Corn     FALSE     FALSE  FALSE   FALSE          FALSE 
## 10 Caramel Apple… FALSE     TRUE   TRUE    FALSE          FALSE 
## # … with 75 more rows, and 7 more variables: crispedricewafer <lgl>,
## #   hard <lgl>, bar <lgl>, pluribus <lgl>, sugarpercent <dbl>,
## #   pricepercent <dbl>, winpercent <dbl>

ggplot(candy_rankings, aes(x = pricepercent, y = winpercent)) +
  geom_point()

model_lm <- lm(winpercent ~ pricepercent, data = candy_rankings)
tidy(model_lm)

## # A tibble: 2 x 5
##   term         estimate std.error statistic  p.value
##   <chr>           <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)      42.0      2.91     14.4  2.39e-24
## 2 pricepercent     17.8      5.30      3.35 1.21e- 3

augment(model_lm) %>%
  ggplot(aes(x = pricepercent, y = winpercent)) +
  geom_point() +
  geom_line(aes(y = .fitted))

model_lm <- lm(winpercent ~ pricepercent + chocolate, data = candy_rankings)
tidy(model_lm)

## # A tibble: 3 x 5
##   term          estimate std.error statistic  p.value
##   <chr>            <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)      41.6       2.40    17.3   4.35e-29
## 2 pricepercent      1.66      5.08     0.328 7.44e- 1
## 3 chocolateTRUE    18.3       2.91     6.29  1.46e- 8

augment(model_lm) %>%
  ggplot(aes(x = pricepercent, y = winpercent,
             color = chocolate,
             shape = chocolate,
             linetype = chocolate)) +
  geom_point() +
  geom_line(aes(y = .fitted))

candy <- candy_rankings
library(plotly)

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

candy3d <- plot_ly(data = candy_rankings,
        x = ~pricepercent,
        y = ~sugarpercent,
        z = ~winpercent,
        type = "scatter3d")
candy3d

## No scatter3d mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode

model_lm <- lm(winpercent ~ pricepercent + sugarpercent, data = candy_rankings)

xy_plane <- expand.grid(0:100, 0:100)/100

ps_plane <- xy_plane %>%
  rename(pricepercent = Var1,
         sugarpercent = Var2) 

lm_plane <- augment(model_lm, newdata = ps_plane)

lm_matrix <- matrix(lm_plane$.fitted, nrow = 101, ncol = 101)

candy3d %>%
  add_surface(
          x = ~(0:100)/100,
          y = ~(0:100)/100,
          z = ~lm_matrix)

## No scatter3d mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode

tidy(model_lm)

## # A tibble: 3 x 5
##   term         estimate std.error statistic  p.value
##   <chr>           <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)     39.8       3.44     11.6  6.33e-19
## 2 pricepercent    15.6       5.60      2.78 6.72e- 3
## 3 sugarpercent     6.73      5.66      1.19 2.38e- 1

chocolate3d <- plot_ly(data = candy_rankings,
                   x = ~pricepercent,
                   y = ~sugarpercent,
                   z = ~winpercent,
                   color = ~chocolate,
                   colors = c("#2d708e", "#d8576b"),
                   mode = "markers",
                   symbol = ~chocolate,
                   symbols = c("o", "circle"),
                   type = "scatter3d",
                   showlegend = FALSE)
chocolate3d

candy <- candy_rankings
model_lm <- lm(winpercent ~ pricepercent + sugarpercent + chocolate, data = candy)

ps_plane <- ps_plane %>%
  mutate(chocolate = TRUE)

lm_plane <- augment(model_lm, newdata = ps_plane)

lm_matrix_true <- matrix(lm_plane$.fitted, nrow = 101, ncol = 101)

ps_plane <- ps_plane %>%
  mutate(chocolate = FALSE)

lm_plane <- augment(model_lm, newdata = ps_plane)

lm_matrix_false <- matrix(lm_plane$.fitted, nrow = 101, ncol = 101)

chocolate3d %>%
  add_surface(
    x = ~(0:100)/100,
    y = ~(0:100)/100,
    z = ~lm_matrix_true,
    showscale = FALSE,
    inherit = FALSE,
    colorscale = list(c(0, 1), c("#f0f921", "#7201a8"))) %>%
  add_surface(
    x = ~(0:100)/100,
    y = ~(0:100)/100,
    z = ~lm_matrix_false,
    showscale = FALSE,
    inherit = FALSE,
    colorscale = list(c(0, 1), c("#3cbb75", "#481567")))

tidy(model_lm)

## # A tibble: 4 x 5
##   term          estimate std.error statistic  p.value
##   <chr>            <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)      38.6       2.81    13.8   6.50e-23
## 2 pricepercent     -1.66      5.27    -0.315 7.54e- 1
## 3 sugarpercent      9.04      4.63     1.95  5.42e- 2
## 4 chocolateTRUE    18.7       2.87     6.53  5.40e- 9

model_lm_all <- lm(winpercent ~ ., candy_rankings %>% select(-competitorname))
tidy(model_lm_all)

## # A tibble: 12 x 5
##    term                 estimate std.error statistic  p.value
##    <chr>                   <dbl>     <dbl>     <dbl>    <dbl>
##  1 (Intercept)            34.5        4.32    7.99   1.44e-11
##  2 chocolateTRUE          19.7        3.90    5.07   2.96e- 6
##  3 fruityTRUE              9.42       3.76    2.50   1.45e- 2
##  4 caramelTRUE             2.22       3.66    0.608  5.45e- 1
##  5 peanutyalmondyTRUE     10.1        3.62    2.79   6.81e- 3
##  6 nougatTRUE              0.804      5.72    0.141  8.88e- 1
##  7 crispedricewaferTRUE    8.92       5.27    1.69   9.47e- 2
##  8 hardTRUE               -6.17       3.46   -1.78   7.85e- 2
##  9 barTRUE                 0.442      5.06    0.0872 9.31e- 1
## 10 pluribusTRUE           -0.854      3.04   -0.281  7.79e- 1
## 11 sugarpercent            9.09       4.66    1.95   5.50e- 2
## 12 pricepercent           -5.93       5.51   -1.08   2.86e- 1

rbind(glance(model_lm), glance(model_lm_all))

## # A tibble: 2 x 12
##   r.squared adj.r.squared sigma statistic  p.value    df logLik   AIC   BIC
##       <dbl>         <dbl> <dbl>     <dbl>    <dbl> <dbl>  <dbl> <dbl> <dbl>
## 1     0.433         0.412  11.3     20.6  5.20e-10     3  -325.  659.  671.
## 2     0.540         0.471  10.7      7.80 9.50e- 9    11  -316.  657.  689.
## # … with 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>

Candy rankings

Joshua Loftus

1/19/2021