This R Notebook is the complement to my blog post Benchmarking TensorFlow on CPUs: More Cost-Effective Deep Learning than GPUs.

This notebook is licensed under the MIT License. If you use the code or data visualization designs contained within this notebook, it would be greatly appreciated if proper attribution is given back to this notebook and/or myself. Thanks! :)

library(readr)
library(dplyr)

Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union
library(ggplot2)
library(scales)

Attaching package: ‘scales’

The following object is masked from ‘package:readr’:

    col_factor
library(tidyr)
library(RColorBrewer)
sessionInfo()
R version 3.4.0 (2017-04-21)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS Sierra 10.12.5

Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] RColorBrewer_1.1-2 tidyr_0.6.3        scales_0.4.1       ggplot2_2.2.1.9000
[5] dplyr_0.7.0        readr_1.1.1       

loaded via a namespace (and not attached):
 [1] Rcpp_0.12.11     assertthat_0.2.0 plyr_1.8.4       grid_3.4.0      
 [5] R6_2.2.2         gtable_0.2.0     magrittr_1.5     rlang_0.1.1     
 [9] lazyeval_0.2.0   tools_3.4.0      glue_1.1.0       munsell_0.4.3   
[13] hms_0.3          compiler_3.4.0   colorspace_1.3-2 knitr_1.16      
[17] tibble_1.3.3    

Set ggplot2 theme.

theme_set(theme_minimal(base_size=9, base_family="Source Sans Pro") +
            theme(plot.title = element_text(size=11, family="Source Sans Pro Bold"),
                  axis.title.x = element_blank(),
                  axis.title.y = element_text(family="Source Sans Pro Semibold"),
                  plot.caption = element_text(size=6, color="#969696"),
                  axis.text.x = element_text(angle = 45, vjust = 0.75, size = 7),
                  legend.position="none"))
relative <- function(x) {
  lab <- paste0(sprintf("%.2f", x), 'x')
}

Set colors according to Brewer palettes for consistent lightness. Ignore first color of palettes since it is too bright.

color_gpu <- brewer.pal(5, "Reds")[5]
colors_pip <- rev(brewer.pal(5, "Blues")[-1])
colors_compiled <- rev(brewer.pal(5, "Greens")[-1])
colors_all <- c(color_gpu, colors_pip[1], colors_compiled[1], colors_pip[2], colors_compiled[2], colors_pip[3], colors_compiled[3], colors_pip[4], colors_compiled[4])

Set known price rates from Google Compute Engine Pricing.

gpu_cost_hr <- 0.745
cpu8_cost_hr <- 0.060
skylake_premium <- 0.0607

Derive the remaining rates, in seconds.

gpu_cost_s <- gpu_cost_hr / 3600
cpu8_cost_s <- (cpu8_cost_hr * (1 + skylake_premium)) / 3600
cpu16_cost_s <- cpu8_cost_s * 2
cpu32_cost_s <- cpu16_cost_s * 2
cpu64_cost_s <- cpu32_cost_s * 2
# works like a Python dict
costs <- c(gpu=gpu_cost_s, cpu8=cpu8_cost_s, cpu16=cpu16_cost_s, cpu32=cpu32_cost_s, cpu64=cpu64_cost_s)

1 Analysis

Create a helpfer function to return the results for all permutations of a given test file name.

tf_types <- c("cpu-compiled", "cpu-pip")
tf_platforms <- c("cpu8","cpu16","cpu32","cpu64")
labels <- c('gpu','cpu64pip', 'cpu64cmp','cpu32pip', 'cpu32cmp','cpu16pip', 'cpu16cmp','cpu8pip', 'cpu8cmp')
process_test_data <- function(file_name) {
  results <- read_csv(sprintf("../logs/gpu/%s", file_name)) %>%
              mutate(type = "gpu", platform = "gpu") %>%
              group_by(type, platform) %>%
              summarize(total_time = sum(elapsed),
                        total_cost = total_time * costs['gpu'])
  
  gpu_total_time <- results %>% pull(total_time)
  gpu_total_cost <- results %>% pull(total_cost)
  
  
  for(tf_type_i in 1:length(tf_types)) {
    tf_type <- tf_types[tf_type_i]
    for(tf_platform_i in 1:length(tf_platforms)) {
      tf_platform <- tf_platforms[tf_platform_i]
      
      temp_df <- read_csv(sprintf("../logs/%s/%s/%s", tf_type, tf_platform, file_name)) %>%
              mutate(type = tf_type, platform = tf_platform) %>%
              group_by(type, platform) %>%
              summarize(total_time = sum(elapsed),
                        total_cost = total_time * costs[tf_platform])
      
      results <- results %>% bind_rows(temp_df)
      
    }
  }
  
  # Normalize
  results <- results %>%
              mutate(total_time_norm = total_time / gpu_total_time,
                     total_cost_norm = total_cost / gpu_total_cost)
  
  # Format/Factorize labels
  
  results <- results %>%
              mutate(label = paste0(
                ifelse(platform == "gpu", '', platform),
                ifelse(type == "cpu-compiled", "cmp", substr(type, nchar(type)-2, nchar(type))))) %>%
              ungroup() %>%
              mutate(label= factor(label, levels=labels)) %>%
              select(label, total_time, total_cost, total_time_norm, total_cost_norm)
  
  return(results)
  
}
process_test_data('cifar10_cnn_tensorflow.csv')
Parsed with column specification:
cols(
  epoch = col_integer(),
  elapsed = col_double(),
  loss = col_double(),
  acc = col_double(),
  val_loss = col_double(),
  val_acc = col_double()
)
Parsed with column specification:
cols(
  epoch = col_integer(),
  elapsed = col_double(),
  loss = col_double(),
  acc = col_double(),
  val_loss = col_double(),
  val_acc = col_double()
)
Parsed with column specification:
cols(
  epoch = col_integer(),
  elapsed = col_double(),
  loss = col_double(),
  acc = col_double(),
  val_loss = col_double(),
  val_acc = col_double()
)
Parsed with column specification:
cols(
  epoch = col_integer(),
  elapsed = col_double(),
  loss = col_double(),
  acc = col_double(),
  val_loss = col_double(),
  val_acc = col_double()
)
Parsed with column specification:
cols(
  epoch = col_integer(),
  elapsed = col_double(),
  loss = col_double(),
  acc = col_double(),
  val_loss = col_double(),
  val_acc = col_double()
)
Parsed with column specification:
cols(
  epoch = col_integer(),
  elapsed = col_double(),
  loss = col_double(),
  acc = col_double(),
  val_loss = col_double(),
  val_acc = col_double()
)
Parsed with column specification:
cols(
  epoch = col_integer(),
  elapsed = col_double(),
  loss = col_double(),
  acc = col_double(),
  val_loss = col_double(),
  val_acc = col_double()
)
Parsed with column specification:
cols(
  epoch = col_integer(),
  elapsed = col_double(),
  loss = col_double(),
  acc = col_double(),
  val_loss = col_double(),
  val_acc = col_double()
)
Parsed with column specification:
cols(
  epoch = col_integer(),
  elapsed = col_double(),
  loss = col_double(),
  acc = col_double(),
  val_loss = col_double(),
  val_acc = col_double()
)

1.1 IMDB Bidirectional LSTM

df_imdb_lstm <- process_test_data("imdb_bidirectional_lstm_tensorflow.csv")
Parsed with column specification:
cols(
  epoch = col_integer(),
  elapsed = col_double(),
  loss = col_double(),
  acc = col_double(),
  val_loss = col_double(),
  val_acc = col_double()
)
Parsed with column specification:
cols(
  epoch = col_integer(),
  elapsed = col_double(),
  loss = col_double(),
  acc = col_double(),
  val_loss = col_double(),
  val_acc = col_double()
)
Parsed with column specification:
cols(
  epoch = col_integer(),
  elapsed = col_double(),
  loss = col_double(),
  acc = col_double(),
  val_loss = col_double(),
  val_acc = col_double()
)
Parsed with column specification:
cols(
  epoch = col_integer(),
  elapsed = col_double(),
  loss = col_double(),
  acc = col_double(),
  val_loss = col_double(),
  val_acc = col_double()
)
Parsed with column specification:
cols(
  epoch = col_integer(),
  elapsed = col_double(),
  loss = col_double(),
  acc = col_double(),
  val_loss = col_double(),
  val_acc = col_double()
)
Parsed with column specification:
cols(
  epoch = col_integer(),
  elapsed = col_double(),
  loss = col_double(),
  acc = col_double(),
  val_loss = col_double(),
  val_acc = col_double()
)
Parsed with column specification:
cols(
  epoch = col_integer(),
  elapsed = col_double(),
  loss = col_double(),
  acc = col_double(),
  val_loss = col_double(),
  val_acc = col_double()
)
Parsed with column specification:
cols(
  epoch = col_integer(),
  elapsed = col_double(),
  loss = col_double(),
  acc = col_double(),
  val_loss = col_double(),
  val_acc = col_double()
)
Parsed with column specification:
cols(
  epoch = col_integer(),
  elapsed = col_double(),
  loss = col_double(),
  acc = col_double(),
  val_loss = col_double(),
  val_acc = col_double()
)
df_imdb_lstm
plot <- ggplot(df_imdb_lstm, aes(x=label, y=total_time_norm, fill=label)) +
          geom_bar(stat="identity") +
          geom_hline(yintercept = 1, linetype="dashed", color="#1a1a1a") +
          scale_y_continuous(labels = relative) +
          scale_fill_manual(values = colors_all) +
          labs(title = "Training Time on CPUs w/ Bidirectional LSTMs",
               x = "Platform",
               y = "Total Model Training Time Relative to GPU",
               caption = "Max Woolf — minimaxir.com")
ggsave("dl-cpu-gpu-1.png", plot, width=4, height=3)