UO: ggplot2 p.2
These are my notes from the University of Oregon’s Data Service Consultants workshop on ggplot2, part 2.
Cameron Mulder ran this workshop. He can be contacted via their consultation page here.
# library(ggplot2)
# install all of tidyverse
# install.packages("tidyverse")
# loads entire tidyverse which inclused ggplot, dyply, and so on
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.6 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.4 ✓ stringr 1.4.0
## ✓ readr 2.0.2 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
ggplot2 review
ggplot(data= <DATA>, mapping = aes(<MAPPING>))+ <GEOM FUNCTION>()
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_point()

# base R
plot(mpg$displ, mpg$hwy)

Note : the $ is how we id the specific variable we are wanting to work with.
Line Graph (Base R)
# help(pressure) , is the same as ?pressure
plot(pressure$temperature, pressure$pressure, type = "l")
# add points
points(pressure$temperature, pressure$pressure)
# add lines (and points)
lines(pressure$temperature, pressure$pressure/2, col = "red")
points(pressure$temperature, pressure$pressure/2, col = "red")

Line Graph (ggplot)
ggplot(pressure, aes(x = temperature, y = pressure)) +
geom_line() +
geom_point()

Bar Graphs (base R)
# using the BOD dataset
# Biochemical Oxygen Demand Dataset
barplot(BOD$demand, names.arg = BOD$Time)

# using mtcars to look at count
# look at some stats
table(mtcars$cyl)
##
## 4 6 8
## 11 7 14
# barplot
barplot(table(mtcars$cyl))

Bar Graphs (ggplot)
# need to change time into a factor() because otherwise it is continuous
ggplot(BOD, aes(x = factor(Time), y = demand)) +
geom_col()

Notice that the 6 isn’t there, that is because of factor().
Note : geom_bar does counts, but column has the height of the bar based on the data.
# don't need to id the y becasue the y will be the count.
ggplot(mtcars, aes(x=cyl)) +
geom_bar()

Histogram (Base R)
hist(mtcars$mpg)
hist(mtcars$mpg, breaks = 4)

Histogram (ggplot)
ggplot(mtcars, aes(x=mpg)) +
geom_histogram(binwidth = 4)

Boxplot (base r)
# ?ToothGrowth
plot(ToothGrowth$supp, ToothGrowth$len)

# formula syntax
plot(len ~ supp, data = ToothGrowth)

plot(len ~ supp + dose, data = ToothGrowth)

Challenge 1
Create a boxplot of toothgrowth using len and supp from ToothGrowth data set.
Hint: geom_boxplot
ggplot(ToothGrowth, aes(x= supp, y = len)) +
geom_boxplot()

Time Series
ggplot will automatically recognize the variable as a date as long as the variable is imported as a date.
# dummy data
data <- data.frame(
day = as.Date("2017-06-14")-0:364,
value = runif(365)
)
head(data)
## day value
## 1 2017-06-14 0.9300607
## 2 2017-06-13 0.5911243
## 3 2017-06-12 0.4875981
## 4 2017-06-11 0.4971915
## 5 2017-06-10 0.5670779
## 6 2017-06-09 0.4948416
# plot
p <- ggplot(data, aes(x = day, y = value)) +
geom_line()
p

Now lets use the economics data set
# ?economics
ggplot(data = economics, aes(x = date, y = pop)) +
geom_line()

# subset of data
subset <- economics %>%
filter(date>as.Date("2006-1-1"))
ggplot(subset, aes(x = date, y = pop)) +
geom_line()

# chart differet data over time
ggplot(economics, aes(x = date, y = pop)) +
geom_line(aes(size = unemploy), color = "red")

The size of the line is based on the value of unemployment (which is the number of unemployment in thousands).
# psavert is the personal savings rate
# unemployment median duration
econ_long <- economics %>%
select(date, psavert, uempmed) %>%
pivot_longer(col = c('psavert', 'uempmed'),
names_to = "variable",
values_to = "values")
head(econ_long)
## # A tibble: 6 × 3
## date variable values
## <date> <chr> <dbl>
## 1 1967-07-01 psavert 12.6
## 2 1967-07-01 uempmed 4.5
## 3 1967-08-01 psavert 12.6
## 4 1967-08-01 uempmed 4.7
## 5 1967-09-01 psavert 11.9
## 6 1967-09-01 uempmed 4.6
# data
ggplot(data = econ_long, aes(x = date, y = values)) +
geom_line(aes(color = variable), size = 1)

R is a great tool for plotting time series.
Maps!!
Polygon maps
mi_counties <- map_data("county", "michigan") %>%
select(lon = long, lat, group, id = subregion)
head(mi_counties)
## lon lat group id
## 1 -83.88675 44.85686 1 alcona
## 2 -83.36536 44.86832 1 alcona
## 3 -83.36536 44.86832 1 alcona
## 4 -83.33098 44.83968 1 alcona
## 5 -83.30806 44.80530 1 alcona
## 6 -83.30233 44.77665 1 alcona
# map 1
ggplot(mi_counties, aes(lon, lat))+
geom_point() +
coord_quickmap()

# nicer map (still not that common)
ggplot(mi_counties, aes(lon, lat, group = group))+
geom_polygon(fill = "white", color = "grey") +
coord_quickmap()

Simple Features
# australlian maps
# install.packages('ozmaps')
library(ozmaps)
library(sf)
## Linking to GEOS 3.8.1, GDAL 3.2.1, PROJ 7.2.1
# use :: to pull from specific packages
oz_stats <- ozmaps::ozmap_states
oz_stats
## Simple feature collection with 9 features and 1 field
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 105.5507 ymin: -43.63203 xmax: 167.9969 ymax: -9.229287
## Geodetic CRS: GDA94
## # A tibble: 9 × 2
## NAME geometry
## * <chr> <MULTIPOLYGON [°]>
## 1 New South Wales (((150.7016 -35.12286, 150.6611 -35.11782, 150.6…
## 2 Victoria (((146.6196 -38.70196, 146.6721 -38.70259, 146.6…
## 3 Queensland (((148.8473 -20.3457, 148.8722 -20.37575, 148.85…
## 4 South Australia (((137.3481 -34.48242, 137.3749 -34.46885, 137.3…
## 5 Western Australia (((126.3868 -14.01168, 126.3625 -13.98264, 126.3…
## 6 Tasmania (((147.8397 -40.29844, 147.8902 -40.30258, 147.8…
## 7 Northern Territory (((136.3669 -13.84237, 136.3339 -13.83922, 136.3…
## 8 Australian Capital Territory (((149.2317 -35.222, 149.2346 -35.24047, 149.271…
## 9 Other Territories (((167.9333 -29.05421, 167.9188 -29.0344, 167.93…
# map of australlia
ggplot(oz_stats)+
geom_sf() +
coord_sf()

# install.packages('rmapshaper')
library(rmapshaper)
## Registered S3 method overwritten by 'geojsonlint':
## method from
## print.location dplyr
oz_stats <- ozmaps::ozmap_states %>% filter(NAME != "Other Territories")
oz_votes <- rmapshaper::ms_simplify(ozmaps::abs_ced)
# map
ggplot()+
geom_sf(data = oz_stats, mapping = aes(fill = NAME)) +
geom_sf(data = oz_votes, fill = NA) +
coord_sf()

Plotly
# install.packages("plotly")
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
fig <- plot_ly(data = iris, x = ~Sepal.Length, y = ~Petal.Length)
fig
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
mpg %>% plot_ly(x = ~displ, y = ~mpg, color = ~class)
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
You can double click on the legend to see a subset of the data.
plot <- ggplot(mpg, aes(x = displ, y = hwy)) +
geom_point(mapping = aes(color = class)) +
geom_smooth()
plot
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplotly(plot)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Art
# install.packages("flametree")
library(flametree)
# pick some colors
shades <- c("blue", "green", "red", "orange")
data <- flametree_grow(time = 10, trees = 10)
data %>% flametree_plot(
background = "white",
palette = shades,
style = "nativeflora"
)

Package by Danielle Navarro. Check out her art here.
Next week we are going over rmd.