first_custom <- function() {
theme_minimal() +
theme(
axis.line = element_line(color = "gray0"),
plot.background = element_rect(fill = "grey95"),
panel.grid.major = element_line(linewidth = 0.5, color = "gray80"),
panel.grid.minor = element_blank(),
#panel.grid.minor = element_line(color = "gray85"),
plot.margin = margin(15, 15, 15 ,15),
plot.title = element_text(face = "bold"),
plot.title.position = "plot",
plot.caption = element_text(size = 8, face = "italic"),
plot.caption.position = "plot",
legend.background = element_rect(),
legend.justification = (c(1,0)),
legend.text = element_text(size = 8),
legend.title = element_text(size = 8),
legend.key.size = unit(0.25, "cm")
)
}
Information about the housing market in Texas
ggplot2 packagedata(txhousing, package = "ggplot2")
unique(txhousing$city)
## [1] "Abilene" "Amarillo" "Arlington"
## [4] "Austin" "Bay Area" "Beaumont"
## [7] "Brazoria County" "Brownsville" "Bryan-College Station"
## [10] "Collin County" "Corpus Christi" "Dallas"
## [13] "Denton County" "El Paso" "Fort Bend"
## [16] "Fort Worth" "Galveston" "Garland"
## [19] "Harlingen" "Houston" "Irving"
## [22] "Kerrville" "Killeen-Fort Hood" "Laredo"
## [25] "Longview-Marshall" "Lubbock" "Lufkin"
## [28] "McAllen" "Midland" "Montgomery County"
## [31] "Nacogdoches" "NE Tarrant County" "Odessa"
## [34] "Paris" "Port Arthur" "San Angelo"
## [37] "San Antonio" "San Marcos" "Sherman-Denison"
## [40] "South Padre Island" "Temple-Belton" "Texarkana"
## [43] "Tyler" "Victoria" "Waco"
## [46] "Wichita Falls"
This dataset contains lots of information
We can use this dataset to answer several different kinds of questions;
Three variables,
To begin, we’ll make a plot showing the evolution of sales over time.
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:viridis':
##
## viridis_pal
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
library(RColorBrewer)
txhousing$date <- as.Date(with(txhousing, paste(year, month, "01", sep="-")), "%Y-%m-%d")
cities_of_interest <- c("Houston", "El Paso", "Austin", "San Antonio")
colors <- brewer.pal(n = length(cities_of_interest), name = "Dark2")
names(colors) <- cities_of_interest
plot <- ggplot(txhousing, aes(x = date, y = volume / 1e9, group = city)) +
geom_line(aes(color = city), size = 0.6) +
scale_color_manual(values = c("grey", colors)) +
scale_y_continuous(breaks = seq(0, 2.5, 0.5), labels = seq(0, 2.5, 0.5)) +
scale_x_date(limits = c(min(txhousing$date), max(txhousing$date) + 800),
expand = c(0.01, 0.01)) +
labs(title = "Time Series of Housing Sales Volume in Texas Cities",
x = "Year",
y = "Sales Volume (USD billions)",
color = "City") +
guides(color = guide_legend(override.aes = list(size=2)))
plot <- plot + geom_line(data = filter(txhousing, city %in% cities_of_interest),
aes(color = city))
last_points <- txhousing %>%
group_by(city) %>%
filter(city %in% cities_of_interest) %>%
summarize(volume = last(volume / 1e9), date = max(date))
plot + geom_text(data = last_points, aes(label = city, x = date, y = volume, color = city),
size = 3, hjust = -0.1) +
theme_minimal() +
theme(legend.position = "none",
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.minor.y = element_blank(),
axis.text.y = element_text(size = 8))
- The aesthetic choices include a distinct color scheme for key cities
to stand out against a neutral grey background for others, moderate line
sizing for clarity, and scaled y-axis labels for readability.
This is not a typical “spaghetti chart” because it strategically emphasizes only a few cities, preventing it from becoming too cluttered.
The visualization’s effectiveness is somewhat limited because while it highlights certain cities well, the remaining cities’ lines are indistinguishable from one another, potentially obscuring detailed comparisons among them.
Next, let’s look more closely at the 4 cities of interest that you selected.
cities_data <- txhousing %>%
filter(city %in% c("Houston", "El Paso", "Austin", "San Antonio"))
cities_data <- cities_data %>%
mutate(volume_scaled = volume / 1e6,
median_scaled = median / 100)
max_value <- max(cities_data$sales, cities_data$volume_scaled, cities_data$median_scaled, na.rm = TRUE)
cities_long <- cities_data %>%
select(city, date, sales, volume_scaled, median_scaled) %>%
gather(key = "metric", value = "value", sales, volume_scaled, median_scaled)
plot3 <- ggplot(cities_long, aes(x = date, y = value, color = metric)) +
geom_line() +
facet_wrap(~ city, scales = "free_y") +
scale_color_viridis_d(direction = 1, option = "D", name = "Metric", labels = c("Median", "Sales", "Volume")) +
labs(title = "Sales Metrics Over Time for Selected Texas Cities",
subtitle = "Volume is scaled down by a factor of 1e6 and median sales price by 100.",
x = "Year",
y = "Values",
color = "Metric") +
theme_minimal() +
theme(legend.position = "bottom",
plot.title.position = "plot",
axis.text = element_text(size = 8))
print(plot3)
plot2 <- ggplot(cities_long, aes(x = date, y = value, color = metric)) +
geom_line() +
facet_wrap(~ city) +
scale_color_viridis_d(direction = 1, option = "D", name = "Metric", labels = c("Median", "Sales", "Volume")) +
scale_y_continuous(trans = 'log10', labels = label_comma()) +
labs(title = "Sales Metrics Over Time for Selected Texas Cities",
subtitle = "Volume is scaled down by a factor of 1e6 and median sales price by 100.",
x = "Year",
y = "Log-Scaled Values",
color = "Metric",
caption = "Y-axis values are log-scaled.") +
theme_minimal() +
theme(legend.position = "bottom",
plot.caption = element_text(face = "italic", size = 8),
plot.title.position = "plot",
axis.text = element_text(size = 8))
print(plot2)
Aesthetic choices for both plots:
Colors to distinguish metrics consistently across both plots. Log scale in one plot to manage wide-ranging values. Independent y-axes in the second plot to focus on individual city trends. Subtitles to inform about data transformations.
City choice:
No special reason; these cities were familiar to me and also provided diverse market insights.
Housing sales stats comparison across both plots:
Houston’s market is notably larger and more volatile. El Paso’s market is smaller with less fluctuation. Austin and San Antonio show growth, with Austin’s being more pronounced.
Conclusions from both plots:
The log scale plot shows relative trends and magnitudes clearly. The independent y-axes plot highlights individual city market behaviors. Each city’s housing market has unique characteristics and trends.
Next, let’s use animation to add a layer to our plots.
cities_long$year <- format(as.Date(cities_long$date), "%Y")
cities_long <- cities_long %>%
mutate(year = as.Date(paste0(year, "-01-01")))
# Animated Plot 3
animated_plot3 <- ggplot(cities_long, aes(x = date, y = value, color = metric)) +
geom_line(lwd = 1.1) +
facet_wrap(~ city, scales = "free_y") +
scale_color_viridis_d(direction = 1, option = "D", name = "Metric", labels = c("Median", "Sales", "Volume")) +
labs(title = "Sales Metrics Over Time for Selected Texas Cities",
subtitle = "Volume is scaled down by a factor of 1e6 and median sales price by 100.",
x = "Year",
y = "Values",
color = "Metric") +
theme_minimal() +
theme(legend.position = "bottom",
plot.title.position = "plot",
axis.text = element_text(size = 8)) +
transition_reveal(along = date)
# Animated Plot 2
animated_plot2 <- ggplot(cities_long, aes(x = date, y = value, color = metric)) +
geom_line(lwd = 1.1) +
facet_wrap(~ city) +
scale_color_viridis_d(direction = 1, option = "D", name = "Metric", labels = c("Median", "Sales", "Volume")) +
scale_y_continuous(trans = 'log10', labels = scales::label_comma()) +
labs(title = "Sales Metrics Over Time for Selected Texas Cities",
subtitle = "Volume is scaled down by a factor of 1e6 and median sales price by 100.",
x = "Year",
y = "Log-Scaled Values",
color = "Metric",
caption = "Y-axis values are log-scaled.") +
theme_minimal() +
theme(legend.position = "bottom",
plot.caption = element_text(face = "italic", size = 8),
plot.title.position = "plot",
axis.text = element_text(size = 8)) +
transition_reveal(along = date)
Animation adds a temporal dimension to the data, highlighting trends and changes over time in an engaging way. ________________________________________________________________________________
Please Choose a dataset
Make at least 3 different visualizations about the data
library(patchwork)
data(airquality)
p1 <- ggplot(airquality, aes(x = Temp, y = Ozone)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm", se = FALSE, color = "red") +
labs(title = "Ozone vs. Temperature", x = "Temperature (degrees F)") +
theme(title = element_text(size = 10))
# Plot 2: Ozone vs. Wind Speed
p2 <- ggplot(airquality, aes(x = Wind, y = Ozone)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm", se = FALSE, color = "#35b779") +
labs(title = "Ozone vs. Wind Speed", x = "Wind (mph)") +
theme(title = element_text(size = 10))
p3 <- ggplot(airquality, aes(x = factor(Month), y = Ozone)) +
geom_boxplot() +
labs(title = "Monthly Ozone Levels", x = "Month") +
theme(title = element_text(size = 10))
p4 <- ggplot(airquality, aes(x = Solar.R, y = Ozone)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm", se = FALSE, color = "#fde725") +
labs(title = "Ozone vs. Solar Radiation", x = "Solar Radiation (Langley)") +
theme(title = element_text(size = 10))
combined_plots <- (p1 | p2) / (p3 | p4) +
plot_annotation(
title = "Interplay of Environmental Factors and Ozone Levels",
subtitle = "Temperature and solar radiation elevate ozone levels, while wind reduces them.",
theme = theme(plot.title = element_text(size = 14, face = "bold"), plot.subtitle = element_text(size = 10))
)
combined_plots
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
airquality <- mutate(airquality, Date = make_date(1973, Month, Day))
# Remove rows with NA values
airquality <- na.omit(airquality)
# Convert dataset to long format
airquality_long <- airquality %>%
select(Date, Ozone, Solar.R, Wind, Temp) %>%
pivot_longer(cols = -Date, names_to = "variable", values_to = "value")
# Rename variables with units
names_with_units <- c(Ozone = "Ozone (ppb)", Solar.R = "Solar Radiation (lang)",
Wind = "Wind (mph)", Temp = "Temperature (degrees F)")
# Apply labels
airquality_long$variable <- factor(airquality_long$variable,
levels = names(names_with_units),
labels = names_with_units)
# Define colors
colors <- c("Ozone (ppb)" = "#440154", "Solar Radiation (lang)" = "#fde725",
"Wind (mph)" = "#35b779", "Temperature (degrees F)" = "red")
# Create the animated plot
animated_plot <- ggplot(airquality_long, aes(x = Date, y = value, color = variable)) +
geom_line(lwd = 1.1) +
scale_color_manual(values = colors) +
facet_wrap(~ variable, scales = "free_y", ncol = 1) +
labs(title = "Environmental Metrics Over Time",
x = "Date",
y = "Values") +
theme_minimal() +
theme(legend.position = "none",
axis.text = element_text(size = 8),
strip.text.x = element_text(size = 10)) +
transition_reveal(along = Date)
Our story about air quality shows how the environment affects ozone levels through the following plot points:
An animated plot, “Environmental Metrics Over Time,” brings motion to the story, dynamically showcasing how these elements fluctuate day-to-day and month-to-month.
This collection of images and animations essentially narrates the succinct tale of how temperature, wind, and sun interact with ozone throughout the year.