Portland R User Group: Aggregate making tables with gt package

24/2/2022 35-minute read

Ted Landeras led this event for Portland R user Group where we watched Rich Lannone|| Making Beautiful Tables with {gt}|| RStudio as a group on Youtube, and then went through some other examples.

Additional Links:

Portland R User Group

Code of Conduct

library(gt)
## Warning: package 'gt' was built under R version 4.0.5
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ dplyr   1.0.8
## ✓ tidyr   1.1.4     ✓ stringr 1.4.0
## ✓ readr   2.0.2     ✓ forcats 0.5.1
## Warning: package 'dplyr' was built under R version 4.0.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(palmerpenguins)

use markdown with md()

pixel size px()

penguins %>%
  group_by(species) %>%
  summarize_at(
    .vars = c(
      "bill_length_mm", 
      "bill_depth_mm",
      "flipper_length_mm",
      "body_mass_g"
      ),
            .funs = ~ mean(.,na.rm = TRUE)) %>%
  gt() %>%
  tab_header(
    title = md("The penguins dataset"),
    subtitle = md("**Three** years of data on penguins on _three_ islands.")
  ) %>%
  cols_label(
    bill_length_mm = md("Bill Length,<br>mm"),
    bill_depth_mm = md("Bill Depth,<br>mm"),
    flipper_length_mm = md("Flipper Length,<br>mm"),
    body_mass_g = md("Body Mass,<br>g")
  ) %>%
  opt_align_table_header(align = "left") %>% 
  fmt_number(columns = bill_length_mm, decimals = 2) %>%
  fmt_number(columns = bill_depth_mm, decimals = 2) %>%
  fmt_number(columns = flipper_length_mm, decimals = 2) %>%
  fmt_number(columns = body_mass_g, scale_by = 1/1000) %>%
  cols_width(
    bill_length_mm ~ px(120),
    bill_depth_mm ~ px(120),
    flipper_length_mm ~ px(120),
    body_mass_g ~ px(120),
    everything() ~ px(100)
  ) %>% 
  tab_source_note(source_note = md("Dataset is from the `palmerpenguins` **R** package.")
                  ) %>%
  # tab_footnote(
  #   footnote = "The Largest of the three penguins studied.",
  #   locations = cells_stub(rows = "Gentoo")
  # )
  tab_footnote(
    footnote = "Flipper Length was measured with a tape measure.",
    locations = cells_column_labels(columns = flipper_length_mm)
  ) %>%
  tab_footnote(
    footnote = "Tap measure suffered some frost damage.",
    locations = list(
      cells_column_labels(columns = flipper_length_mm),
      cells_body(columns = flipper_length_mm)
  )
  ) %>%
  # tab_style(
  #   locations = cells_body(
  #     columns = c("bill_length_mm", "bill_depth_mm", "flipper_length_mm", "body_mass_g")
  #     ),
  #   style = list(
  #     cell_fill(color = "steelblue"),
  #     cell_text(color = "white")
  #     )
  # ) %>%
  data_color(
    columns = c(bill_depth_mm),
    colors = scales::col_numeric(
      palett = c("red", "orange", "brown"),
      domain = c(10,20)) 
  ) %>%
    data_color(
    columns = c(bill_length_mm),
    colors = scales::col_numeric(
      palett = c("red", "orange", "green"),
      domain = c(30,50)) 
  ) %>%
  tab_stubhead(label = md("Penguin *Species*")) %>%
  opt_table_font(font = google_font("Montserrat"), 
                 weight = 600, 
                 style = "italic")
The penguins dataset
Three years of data on penguins on three islands.
species Bill Length,
mm
Bill Depth,
mm
Flipper Length,
mm1,2
Body Mass,
g
Adelie 38.79 18.35 189.952 3.70
Chinstrap 48.83 18.42 195.822 3.73
Gentoo 47.50 14.98 217.192 5.08
Dataset is from the palmerpenguins R package.
1 Flipper Length was measured with a tape measure.
2 Tap measure suffered some frost damage.

Practice

What can {gt} do for you?

The following are taken from a number of different examples. Where possible, I’ve tried to notate the source.

{gt} parts reference

url <- "gt_parts_of_a_table.svg"
knitr::include_graphics(url)

Allison Horst’s Examples

These examples are from: https://www.allisonhorst.com/post/2020-03-02-gt-tables-examples/

Example 1

head(rock)
##   area    peri     shape perm
## 1 4990 2791.90 0.0903296  6.3
## 2 7002 3892.60 0.1486220  6.3
## 3 7558 3930.66 0.1833120  6.3
## 4 7352 3869.32 0.1170630  6.3
## 5 7943 3948.54 0.1224170 17.1
## 6 7979 4010.15 0.1670450 17.1
rock %>% # Get 'rock' data
  head(5) %>% # First 5 lines only
  gt() # Make a table, it just works.
area peri shape perm
4990 2791.90 0.0903296 6.3
7002 3892.60 0.1486220 6.3
7558 3930.66 0.1833120 6.3
7352 3869.32 0.1170630 6.3
7943 3948.54 0.1224170 17.1

Example 1

head(BOD)
##   Time demand
## 1    1    8.3
## 2    2   10.3
## 3    3   19.0
## 4    4   16.0
## 5    5   15.6
## 6    7   19.8
BOD %>% # Get the data...
gt() %>% # use 'gt' to make an awesome table...
  tab_header( 
    title = "BOD Table Woooooo!", # ...with this title
    subtitle = "Hooray gt!") %>% # and this subtitle
  fmt_number( # A column (numeric data)
    columns = vars(Time), # What column variable? BOD$Time
    decimals = 2 # With two decimal places
    ) %>% 
  fmt_number( # Another column (also numeric data)
    columns = vars(demand), # What column variable? BOD$demand
    decimals = 1 # I want this column to have one decimal place
  ) %>% 
  cols_label(Time = "Time (hours)", demand = "Demand (mg/L)") # Update labels
## Warning: `columns = vars(...)` has been deprecated in gt 0.3.0:
## * please use `columns = c(...)` instead

## Warning: `columns = vars(...)` has been deprecated in gt 0.3.0:
## * please use `columns = c(...)` instead

## Warning: `columns = vars(...)` has been deprecated in gt 0.3.0:
## * please use `columns = c(...)` instead

## Warning: `columns = vars(...)` has been deprecated in gt 0.3.0:
## * please use `columns = c(...)` instead
BOD Table Woooooo!
Hooray gt!
Time (hours) Demand (mg/L)
1.00 8.3
2.00 10.3
3.00 19.0
4.00 16.0
5.00 15.6
7.00 19.8

Example 2

tooth_length <- ToothGrowth %>% 
  group_by(supp, dose) %>% 
  summarize(
    mean_len = mean(len)
  ) %>% 
  as_tibble() 
## `summarise()` has grouped output by 'supp'. You can override using the
## `.groups` argument.
# A gt table: 
tooth_length %>% # Take tooth_length
  gt() %>% # Make a gt table with it
  tab_header(
    title = "A title just like that", # Add a title
    subtitle = "(with something below it!)" # And a subtitle
  ) %>%
  fmt_passthrough( # Not sure about this but it works...
    columns = vars(supp) # First column: supp (character)
  ) %>% 
  fmt_number(
    columns = vars(mean_len), # Second column: mean_len (numeric)
    decimals = 2 # With 4 decimal places
  ) %>% 
  fmt_number(
    columns = vars(dose), # Third column: dose (numeric)
    decimals = 2 # With 2 decimal places
  ) %>% 
  data_color( # Update cell colors...
    columns = vars(supp), # ...for supp column!
    colors = scales::col_factor( # <- bc it's a factor
      palette = c(
        "green","cyan"), # Two factor levels, two colors
      domain = c("OJ","VC")# Levels
  )
  ) %>% 
  data_color( # Update cell colors...
    columns = vars(dose), # ...for dose column 
    colors = scales::col_numeric( # <- bc it's numeric
      palette = c(
        "yellow","orange"), # A color scheme (gradient)
      domain = c(0.5,2) # Column scale endpoints
  )
  ) %>% 
  data_color( # Update cell colors...
    columns = vars(mean_len), # ...for mean_len column
    colors = scales::col_numeric(
      palette = c(
        "red", "purple"), # Overboard colors! 
      domain = c(7,27) # Column scale endpoints
  )
  ) %>% 
  cols_label(supp = "Supplement", dose = "Dosage (mg/d)", mean_len = "Mean Tooth Length") %>% # Make the column headers
  tab_footnote(
    footnote = "Baby footnote test", # This is the footnote text
    locations = cells_column_labels(
      columns = vars(supp) # Associated with column 'supp'
      )
    ) %>% 
    tab_footnote(
    footnote = "A second footnote", # Another line of footnote text
    locations = cells_column_labels( 
      columns = vars(dose) # Associated with column 'dose'
      )
    )
## Warning: `columns = vars(...)` has been deprecated in gt 0.3.0:
## * please use `columns = c(...)` instead

## Warning: `columns = vars(...)` has been deprecated in gt 0.3.0:
## * please use `columns = c(...)` instead

## Warning: `columns = vars(...)` has been deprecated in gt 0.3.0:
## * please use `columns = c(...)` instead

## Warning: `columns = vars(...)` has been deprecated in gt 0.3.0:
## * please use `columns = c(...)` instead

## Warning: `columns = vars(...)` has been deprecated in gt 0.3.0:
## * please use `columns = c(...)` instead

## Warning: `columns = vars(...)` has been deprecated in gt 0.3.0:
## * please use `columns = c(...)` instead

## Warning: `columns = vars(...)` has been deprecated in gt 0.3.0:
## * please use `columns = c(...)` instead

## Warning: `columns = vars(...)` has been deprecated in gt 0.3.0:
## * please use `columns = c(...)` instead

## Warning: `columns = vars(...)` has been deprecated in gt 0.3.0:
## * please use `columns = c(...)` instead

## Warning: `columns = vars(...)` has been deprecated in gt 0.3.0:
## * please use `columns = c(...)` instead
A title just like that
(with something below it!)
Supplement1 Dosage (mg/d)2 Mean Tooth Length
OJ 0.50 13.23
OJ 1.00 22.70
OJ 2.00 26.06
VC 0.50 7.98
VC 1.00 16.77
VC 2.00 26.14
1 Baby footnote test
2 A second footnote

Animal Crossing (by Ted)

This is an example using the Animal Crossing dataset from Tidy Tuesday. In this dataset, you’ll see that the items column is a URL.

items <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-05-05/items.csv')
## Warning: One or more parsing issues, see `problems()` for details
## Rows: 4565 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): id, name, category, sell_currency, buy_currency, sources, recipe_i...
## dbl  (4): num_id, sell_value, buy_value, recipe
## lgl  (2): orderable, customizable
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(items)
## # A tibble: 6 × 16
##   num_id id          name  category orderable sell_value sell_currency buy_value
##    <dbl> <chr>       <chr> <chr>    <lgl>          <dbl> <chr>             <dbl>
## 1     12 3d-glasses  3D G… Accesso… NA               122 bells               490
## 2     14 a-tee       A Tee Tops     NA               140 bells               560
## 3     17 abstract-w… Abst… Wallpap… TRUE             390 bells              1560
## 4     19 academy-un… Acad… Dresses  NA               520 bells              2080
## 5     20 acanthoste… Acan… Fossils  FALSE           2000 bells                NA
## 6     21 accessorie… Acce… Furnitu… TRUE             375 bells              1500
## # … with 8 more variables: buy_currency <chr>, sources <chr>,
## #   customizable <lgl>, recipe <dbl>, recipe_id <chr>, games_id <chr>,
## #   id_full <chr>, image_url <chr>

We can display

items %>% 
  top_n(10, buy_value) %>%
  arrange(desc(buy_value)) %>%
  select(name, sell_value, buy_value, category, image=image_url) %>%
  gt() %>%
   text_transform(
     #tell gt that the image column 
    locations = cells_body(vars(image)),
    fn = function(x) {
      web_image(
        url = x,
        height = 50
      )
    }
  )
## Warning: `columns = vars(...)` has been deprecated in gt 0.3.0:
## * please use `columns = c(...)` instead
name sell_value buy_value category image
Royal Crown 300000 1200000 Hats
Crown 250000 1000000 Hats
Gold Armor 80000 320000 Dresses
Golden Casket 80000 320000 Furniture
Grand Piano 65000 260000 Furniture
Golden Toilet 60000 240000 Furniture
Blue Steel Staircase NA 228000 Furniture
Iron Bridge NA 228000 Furniture
Red Steel Staircase NA 228000 Furniture
Red Zen Bridge NA 228000 Furniture
Zen Bridge NA 228000 Furniture

Tom Mock’s example with {gtExtras}

Tom Mock has created a package called {gtExtras} which can be installed with the following command:

library(gtExtras)
remotes::install_github("jthomasmock/gtExtras")
## Skipping install of 'gtExtras' from a github remote, the SHA1 (9649be4b) has not changed since last install.
##   Use `force = TRUE` to force installation

This example is from one of Tom’s tweets. Gist is here: https://gist.github.com/jthomasmock/923604deb65682b0364c9220a85ddb36

stream_data <- readr::read_csv("stream_data.csv")
## Rows: 9 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): distributor, type
## dbl (3): Ratio, Nominee, Winner
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
stream_data
## # A tibble: 9 × 5
##   distributor type   Ratio Nominee Winner
##   <chr>       <chr>  <dbl>   <dbl>  <dbl>
## 1 Netflix     wifi  0.277      224     62
## 2 HBO         video 0.288      156     45
## 3 NBC         tv    0.205       78     16
## 4 ABC         tv    0.16        50      8
## 5 Hulu        wifi  0.02        50      1
## 6 FX Networks video 0.111       45      5
## 7 Prime Video wifi  0.0889      45      4
## 8 Apple TV+   wifi  0.238       42     10
## 9 CBS         tv    0.025       40      1
gt(stream_data) %>% 
  #transform Nominee and Winner columns into a bullet plot
  gt_plt_bullet(column = Nominee, target = Winner) %>% 
  #take the type column and retrieve a font awesome symbol
  gt_fa_column(column = type) %>% 
  #format table with nytimes style formating
  gt_theme_nytimes() %>% 
  #format ratio column with percentage
  fmt_symbol_first(column = Ratio, suffix = "%", decimals = 1, scale_by = 100)
distributor type Ratio Nominee
Netflix
WiFi
27.7%
HBO
Video
28.8 
NBC
Television
20.5 
ABC
Television
16.0 
Hulu
WiFi
2.0 
FX Networks
Video
11.1 
Prime Video
WiFi
8.9 
Apple TV+
WiFi
23.8 
CBS
Television
2.5 

{gtsummary} package

Example from: https://cran.r-project.org/web/packages/gtsummary/vignettes/gallery.html

For biostatisticians, one of the most annoying tasks to build is a Table 1: a breakdown of research subjects by various demographics. The {gtsummary} package simplifies building these tables.

trial %>%
  select(trt, age, grade) %>%
  tbl_summary(
    by = trt, 
    missing = "no",
    statistic = all_continuous() ~ "{median} ({p25}, {p75})"
  ) %>%
  modify_header(all_stat_cols() ~ "**{level}**<br>N =  {n} ({style_percent(p)}%)") %>%
  add_n() %>%
  bold_labels() %>%
  modify_spanning_header(all_stat_cols() ~ "**Chemotherapy Treatment**")
Characteristic N Chemotherapy Treatment
Drug A
N = 98 (49%)1
Drug B
N = 102 (51%)1
Age 189 46 (37, 59) 48 (39, 56)
Grade 200
I 35 (36%) 33 (32%)
II 32 (33%) 36 (35%)
III 31 (32%) 33 (32%)
1 Median (IQR); n (%)