first_custom <- function() {
  theme_minimal() +
    theme(
      axis.line = element_line(color = "gray0"),
      plot.background  = element_rect(fill = "grey95"),
      panel.grid.major = element_line(linewidth = 0.5, color = "gray80"),
      panel.grid.minor = element_blank(),
      #panel.grid.minor = element_line(color = "gray85"),
      plot.margin = margin(15, 15, 15 ,15),
      plot.title = element_text(face = "bold"),
      plot.title.position = "plot",
      plot.caption = element_text(size = 8, face = "italic"),
      plot.caption.position = "plot",
      legend.background = element_rect(),
      legend.justification = (c(1,0)),
      legend.text = element_text(size = 8),
      legend.title = element_text(size = 8),
      legend.key.size = unit(0.25, "cm")
    )
}

Texas housing data

Information about the housing market in Texas

data(txhousing, package = "ggplot2")
unique(txhousing$city)
##  [1] "Abilene"               "Amarillo"              "Arlington"            
##  [4] "Austin"                "Bay Area"              "Beaumont"             
##  [7] "Brazoria County"       "Brownsville"           "Bryan-College Station"
## [10] "Collin County"         "Corpus Christi"        "Dallas"               
## [13] "Denton County"         "El Paso"               "Fort Bend"            
## [16] "Fort Worth"            "Galveston"             "Garland"              
## [19] "Harlingen"             "Houston"               "Irving"               
## [22] "Kerrville"             "Killeen-Fort Hood"     "Laredo"               
## [25] "Longview-Marshall"     "Lubbock"               "Lufkin"               
## [28] "McAllen"               "Midland"               "Montgomery County"    
## [31] "Nacogdoches"           "NE Tarrant County"     "Odessa"               
## [34] "Paris"                 "Port Arthur"           "San Angelo"           
## [37] "San Antonio"           "San Marcos"            "Sherman-Denison"      
## [40] "South Padre Island"    "Temple-Belton"         "Texarkana"            
## [43] "Tyler"                 "Victoria"              "Waco"                 
## [46] "Wichita Falls"

This dataset contains lots of information

We can use this dataset to answer several different kinds of questions;

Three variables,

To begin, we’ll make a plot showing the evolution of sales over time.

library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:viridis':
## 
##     viridis_pal
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
library(RColorBrewer)

txhousing$date <- as.Date(with(txhousing, paste(year, month, "01", sep="-")), "%Y-%m-%d")

cities_of_interest <- c("Houston", "El Paso", "Austin", "San Antonio")
colors <- brewer.pal(n = length(cities_of_interest), name = "Dark2")
names(colors) <- cities_of_interest

plot <- ggplot(txhousing, aes(x = date, y = volume / 1e9, group = city)) +
  geom_line(aes(color = city), size = 0.6) +
  scale_color_manual(values = c("grey", colors)) +
  scale_y_continuous(breaks = seq(0, 2.5, 0.5), labels = seq(0, 2.5, 0.5)) +
  scale_x_date(limits = c(min(txhousing$date), max(txhousing$date) + 800),
               expand = c(0.01, 0.01)) +
  labs(title = "Time Series of Housing Sales Volume in Texas Cities",
       x = "Year",
       y = "Sales Volume (USD billions)",
       color = "City") +
  guides(color = guide_legend(override.aes = list(size=2)))

plot <- plot + geom_line(data = filter(txhousing, city %in% cities_of_interest),
                         aes(color = city))

last_points <- txhousing %>%
  group_by(city) %>%
  filter(city %in% cities_of_interest) %>%
  summarize(volume = last(volume / 1e9), date = max(date))

plot + geom_text(data = last_points, aes(label = city, x = date, y = volume, color = city), 
                 size = 3, hjust = -0.1) +
  theme_minimal() +
  theme(legend.position = "none",
        panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        panel.grid.minor.y = element_blank(),
        axis.text.y = element_text(size = 8))

- The aesthetic choices include a distinct color scheme for key cities to stand out against a neutral grey background for others, moderate line sizing for clarity, and scaled y-axis labels for readability.

Subsetting and faceting

Next, let’s look more closely at the 4 cities of interest that you selected.

  • create a faceted plot that shows each city on a different plot
  • show sales, volume, and median vs. time
  • use aesthetics wisely so that the information is clear and consistent across all plots
cities_data <- txhousing %>% 
  filter(city %in% c("Houston", "El Paso", "Austin", "San Antonio"))

cities_data <- cities_data %>%
  mutate(volume_scaled = volume / 1e6,
         median_scaled = median / 100)

max_value <- max(cities_data$sales, cities_data$volume_scaled, cities_data$median_scaled, na.rm = TRUE)

cities_long <- cities_data %>%
  select(city, date, sales, volume_scaled, median_scaled) %>%
  gather(key = "metric", value = "value", sales, volume_scaled, median_scaled)

plot3 <- ggplot(cities_long, aes(x = date, y = value, color = metric)) +
  geom_line() +
  facet_wrap(~ city, scales = "free_y") + 
  scale_color_viridis_d(direction = 1, option = "D", name = "Metric", labels = c("Median", "Sales", "Volume")) +
  labs(title = "Sales Metrics Over Time for Selected Texas Cities",
       subtitle = "Volume is scaled down by a factor of 1e6 and median sales price by 100.",
       x = "Year",
       y = "Values",
       color = "Metric") +
  theme_minimal() +
  theme(legend.position = "bottom",
        plot.title.position = "plot",
        axis.text = element_text(size = 8))

print(plot3)

plot2 <- ggplot(cities_long, aes(x = date, y = value, color = metric)) +
  geom_line() +
  facet_wrap(~ city) +
  scale_color_viridis_d(direction = 1, option = "D", name = "Metric", labels = c("Median", "Sales", "Volume")) +
  scale_y_continuous(trans = 'log10', labels = label_comma()) +
  labs(title = "Sales Metrics Over Time for Selected Texas Cities",
       subtitle = "Volume is scaled down by a factor of 1e6 and median sales price by 100.",
       x = "Year",
       y = "Log-Scaled Values",
       color = "Metric",
       caption = "Y-axis values are log-scaled.") +
  theme_minimal() +
  theme(legend.position = "bottom",
        plot.caption = element_text(face = "italic", size = 8),
        plot.title.position = "plot",
        axis.text = element_text(size = 8))

print(plot2)

Aesthetic choices for both plots:

Colors to distinguish metrics consistently across both plots. Log scale in one plot to manage wide-ranging values. Independent y-axes in the second plot to focus on individual city trends. Subtitles to inform about data transformations.

City choice:

No special reason; these cities were familiar to me and also provided diverse market insights.

Housing sales stats comparison across both plots:

Houston’s market is notably larger and more volatile. El Paso’s market is smaller with less fluctuation. Austin and San Antonio show growth, with Austin’s being more pronounced.

Conclusions from both plots:

The log scale plot shows relative trends and magnitudes clearly. The independent y-axes plot highlights individual city market behaviors. Each city’s housing market has unique characteristics and trends.

Animated plots

Next, let’s use animation to add a layer to our plots.

cities_long$year <- format(as.Date(cities_long$date), "%Y")
cities_long <- cities_long %>%
  mutate(year = as.Date(paste0(year, "-01-01")))

# Animated Plot 3
animated_plot3 <- ggplot(cities_long, aes(x = date, y = value, color = metric)) +
  geom_line(lwd = 1.1) +
  facet_wrap(~ city, scales = "free_y") + 
  scale_color_viridis_d(direction = 1, option = "D", name = "Metric", labels = c("Median", "Sales", "Volume")) +
  labs(title = "Sales Metrics Over Time for Selected Texas Cities",
       subtitle = "Volume is scaled down by a factor of 1e6 and median sales price by 100.",
       x = "Year",
       y = "Values",
       color = "Metric") +
  theme_minimal() +
  theme(legend.position = "bottom",
        plot.title.position = "plot",
        axis.text = element_text(size = 8)) +
  transition_reveal(along = date)
Sales Metrics
Sales Metrics
# Animated Plot 2
animated_plot2 <- ggplot(cities_long, aes(x = date, y = value, color = metric)) +
  geom_line(lwd = 1.1) +
  facet_wrap(~ city) +
  scale_color_viridis_d(direction = 1, option = "D", name = "Metric", labels = c("Median", "Sales", "Volume")) +
  scale_y_continuous(trans = 'log10', labels = scales::label_comma()) +
  labs(title = "Sales Metrics Over Time for Selected Texas Cities",
       subtitle = "Volume is scaled down by a factor of 1e6 and median sales price by 100.",
       x = "Year",
       y = "Log-Scaled Values",
       color = "Metric",
       caption = "Y-axis values are log-scaled.") +
  theme_minimal() +
  theme(legend.position = "bottom",
        plot.caption = element_text(face = "italic", size = 8),
        plot.title.position = "plot",
        axis.text = element_text(size = 8)) +
  transition_reveal(along = date)
Log Scaled
Log Scaled

Animation adds a temporal dimension to the data, highlighting trends and changes over time in an engaging way. ________________________________________________________________________________

Interplay of Environmental Factors and Ozone Levels

Please Choose a dataset

Make at least 3 different visualizations about the data

library(patchwork)
data(airquality)


p1 <- ggplot(airquality, aes(x = Temp, y = Ozone)) +
  geom_point(alpha = 0.5) +
  geom_smooth(method = "lm", se = FALSE, color = "red") +
  labs(title = "Ozone vs. Temperature", x = "Temperature (degrees F)") +
  theme(title = element_text(size = 10))

# Plot 2: Ozone vs. Wind Speed
p2 <- ggplot(airquality, aes(x = Wind, y = Ozone)) +
  geom_point(alpha = 0.5) +
  geom_smooth(method = "lm", se = FALSE, color = "#35b779") +
  labs(title = "Ozone vs. Wind Speed", x = "Wind (mph)") +
  theme(title = element_text(size = 10))


p3 <- ggplot(airquality, aes(x = factor(Month), y = Ozone)) +
  geom_boxplot() +
  labs(title = "Monthly Ozone Levels", x = "Month") +
  theme(title = element_text(size = 10))

p4 <- ggplot(airquality, aes(x = Solar.R, y = Ozone)) +
  geom_point(alpha = 0.5) +
  geom_smooth(method = "lm", se = FALSE, color = "#fde725") +
  labs(title = "Ozone vs. Solar Radiation", x = "Solar Radiation (Langley)") +
  theme(title = element_text(size = 10))


combined_plots <- (p1 | p2) / (p3 | p4) +
  plot_annotation(
    title = "Interplay of Environmental Factors and Ozone Levels",
    subtitle = "Temperature and solar radiation elevate ozone levels, while wind reduces them.",
    theme = theme(plot.title = element_text(size = 14, face = "bold"), plot.subtitle = element_text(size = 10))
  )

combined_plots
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

airquality <- mutate(airquality, Date = make_date(1973, Month, Day))

# Remove rows with NA values
airquality <- na.omit(airquality)

# Convert dataset to long format
airquality_long <- airquality %>%
  select(Date, Ozone, Solar.R, Wind, Temp) %>%
  pivot_longer(cols = -Date, names_to = "variable", values_to = "value")

# Rename variables with units
names_with_units <- c(Ozone = "Ozone (ppb)", Solar.R = "Solar Radiation (lang)",
                      Wind = "Wind (mph)", Temp = "Temperature (degrees F)")

# Apply labels
airquality_long$variable <- factor(airquality_long$variable,
                                   levels = names(names_with_units),
                                   labels = names_with_units)

# Define colors
colors <- c("Ozone (ppb)" = "#440154", "Solar Radiation (lang)" = "#fde725",
            "Wind (mph)" = "#35b779", "Temperature (degrees F)" = "red")

# Create the animated plot
animated_plot <- ggplot(airquality_long, aes(x = Date, y = value, color = variable)) +
  geom_line(lwd = 1.1) +
  scale_color_manual(values = colors) + 
  facet_wrap(~ variable, scales = "free_y", ncol = 1) + 
  labs(title = "Environmental Metrics Over Time",
       x = "Date",
       y = "Values") +
  theme_minimal() +
  theme(legend.position = "none", 
        axis.text = element_text(size = 8),
        strip.text.x = element_text(size = 10)) + 
  transition_reveal(along = Date)

Our story about air quality shows how the environment affects ozone levels through the following plot points:

An animated plot, “Environmental Metrics Over Time,” brings motion to the story, dynamically showcasing how these elements fluctuate day-to-day and month-to-month.

This collection of images and animations essentially narrates the succinct tale of how temperature, wind, and sun interact with ozone throughout the year.