This R Notebook is the complement to my blog post Visualizing One Million NCAA Basketball Shots.

This notebook is licensed under the MIT License. If you use the code or data visualization designs contained within this notebook, it would be greatly appreciated if proper attribution is given back to this notebook and/or myself. Thanks! :)

1 Setup

library(tidyverse)
── Attaching packages ────────────────────────────────── tidyverse 1.2.1 ──
βœ” ggplot2 2.2.1.9000     βœ” purrr   0.2.4     
βœ” tibble  1.4.2          βœ” dplyr   0.7.4     
βœ” tidyr   0.8.0          βœ” stringr 1.3.0     
βœ” readr   1.1.1          βœ” forcats 0.3.0     
── Conflicts ───────────────────────────────────── tidyverse_conflicts() ──
βœ– dplyr::filter() masks stats::filter()
βœ– dplyr::lag()    masks stats::lag()
library(scales)

Attaching package: β€˜scales’

The following object is masked from β€˜package:purrr’:

    discard

The following object is masked from β€˜package:readr’:

    col_factor
library(viridis)
Loading required package: viridisLite

Attaching package: β€˜viridis’

The following object is masked from β€˜package:scales’:

    viridis_pal
# Special thanks to Ewen Gallic for his implementation of a ggplot2 basketball court
# http://egallic.fr/en/drawing-a-basketball-court-with-r/
source("bb_court_college.R")

Attaching package: β€˜gridExtra’

The following object is masked from β€˜package:dplyr’:

    combine

Scale for 'x' is already present. Adding another scale for 'x', which
will replace the existing scale.
Scale for 'y' is already present. Adding another scale for 'y', which
will replace the existing scale.
Scale for 'x' is already present. Adding another scale for 'x', which
will replace the existing scale.
Scale for 'y' is already present. Adding another scale for 'y', which
will replace the existing scale.
Scale for 'y' is already present. Adding another scale for 'y', which
will replace the existing scale.
Scale for 'x' is already present. Adding another scale for 'x', which
will replace the existing scale.
sessionInfo()
R version 3.4.4 (2018-03-15)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS High Sierra 10.13.3

Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] grid      stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] gtable_0.2.0       gridExtra_2.3      viridis_0.5.0     
 [4] viridisLite_0.3.0  scales_0.5.0       forcats_0.3.0     
 [7] stringr_1.3.0      dplyr_0.7.4        purrr_0.2.4       
[10] readr_1.1.1        tidyr_0.8.0        tibble_1.4.2      
[13] ggplot2_2.2.1.9000 tidyverse_1.2.1   

loaded via a namespace (and not attached):
 [1] Rcpp_0.12.16     cellranger_1.1.0 pillar_1.2.1     compiler_3.4.4  
 [5] plyr_1.8.4       bindr_0.1.1      tools_3.4.4      jsonlite_1.5    
 [9] lubridate_1.7.3  nlme_3.1-131.1   lattice_0.20-35  pkgconfig_2.0.1 
[13] rlang_0.2.0      psych_1.7.8      cli_1.0.0        rstudioapi_0.7  
[17] yaml_2.1.18      parallel_3.4.4   haven_1.1.1      bindrcpp_0.2    
[21] xml2_1.2.0       httr_1.3.1       knitr_1.20       hms_0.4.2       
[25] glue_1.2.0       R6_2.2.2         readxl_1.0.0     foreign_0.8-69  
[29] modelr_0.1.1     reshape2_1.4.3   magrittr_1.5     rvest_0.3.2     
[33] assertthat_0.2.0 mnormt_1.5-5     colorspace_1.3-2 stringi_1.1.7   
[37] lazyeval_0.2.1   munsell_0.4.3    broom_0.4.3      crayon_1.3.4    
theme_set(theme_minimal(base_size=9, base_family="Source Sans Pro") +
            theme(plot.title = element_text(size=8, family="Source Sans Pro Bold", margin=margin(t = -0.1, b = 0.1, unit='cm')),
                  axis.title.x = element_text(size=8),
                  axis.title.y = element_text(size=8),
                  plot.subtitle = element_text(family="Source Sans Pro Semibold", color="#969696", size=6),
                  plot.caption = element_text(size=6, color="#969696"),
                  legend.text = element_text(size = 6),
                  legend.key.width = unit(0.25, unit='cm')))
bb_theme <- theme(
                plot.title = element_text(size=10, family="Source Sans Pro Bold", margin=margin(t = -0.1, b = 0.0, unit='cm')),
                axis.title.x = element_blank(),
                axis.title.y = element_blank(),
                axis.text.x = element_blank(),
                axis.text.y = element_blank(),
                panel.grid = element_blank(),
                legend.position = 'top',
                legend.text = element_text(size = 6),
                legend.title = element_text(size = 6),
                legend.key.width = unit(1, unit='cm'),
                legend.key.height = unit(0.25, unit='cm'),
                legend.margin = margin(c(0, 0, -0.4, 0), unit='cm'))

BigQuery:

#standardSQL
SELECT CAST(event_coord_x as int64) as x,
        600 - CAST(event_coord_y as int64) as y,
        COUNT(*) as attempts,
        COUNTIF(points_scored IS NOT NULL) as successes,
        AVG(IFNULL(CAST(points_scored as int64), 0)) as avg_points
FROM `bigquery-public-data.ncaa_basketball.mbb_pbp_sr`
WHERE shot_type IS NOT NULL
AND event_coord_x IS NOT NULL
AND event_coord_y IS NOT NULL
AND scheduled_date < '2018-03-15'
GROUP BY x, y
ORDER BY attempts DESC, avg_points DESC
file_path <- "court.csv"
df <- read_csv(file_path, progress=NA)
Parsed with column specification:
cols(
  x = col_integer(),
  y = col_integer(),
  attempts = col_integer(),
  successes = col_integer(),
  avg_points = col_double()
)
df %>% head()
df <- df %>% mutate(
  x = rescale(x, to = c(0,94)),
  y = rescale(y, to = c(-50,0)),
  perc_success = successes/attempts
)
df %>% head()
plot <- P_180 +
          stat_summary_2d(data=df, aes(x=x, y=y, z=attempts), alpha=0.8, binwidth=c(25/(2*25), 50/(2*47)), fun=sum) +
          scale_fill_viridis(option='inferno', end=1, labels=comma) +
          labs(title=sprintf('Heat Map of %s Basketball Shots from NCAA Games', df %>% pull(attempts) %>% sum() %>% comma()),
                subtitle='Starting with the 2013-14 season. Via Sportradar data in BigQuery',
               fill='# of 2pt/3pt Shot Attempts\nMade From Spot',
               caption = "Max Woolf β€” minimaxir.com") +
          bb_theme
ggsave('ncaa_count_attempts_unlog.png', plot, width=6, height=4)