first_custom <- function() {
  theme_minimal() +
    theme(
      axis.line = element_line(color = "gray0"),
      plot.background  = element_rect(fill = "grey95"),
      panel.grid.major = element_line(linewidth = 0.5, color = "gray80"),
      panel.grid.minor = element_blank(),
      #panel.grid.minor = element_line(color = "gray85"),
      plot.margin = margin(15, 15, 15 ,15),
      plot.title = element_text(face = "bold"),
      plot.title.position = "plot",
      plot.caption = element_text(size = 8, face = "italic"),
      plot.caption.position = "plot",
      legend.background = element_rect(),
      legend.justification = (c(1,0)),
      legend.text = element_text(size = 8),
      legend.title = element_text(size = 8),
      legend.key.size = unit(0.25, "cm")
    )
}

Tidy Tuesday Description

Taken from the GitHub repo

The TidyTuesday Project is a weekly data project aimed at the R ecosystem. As this project was born out of the R4DS Online Learning Community and the R for Data Science textbook, an emphasis was placed on understanding how to summarize and arrange data to make meaningful charts with ggplot2, tidyr, dplyr, and other tools in the tidyverse ecosystem, but other packages are more than welcome.

Every week they post a raw dataset, a chart or article related to that dataset, and ask participants to explore the data. While the dataset will be “tamed”, it will not always be tidy! As such you might need to apply various R for Data Science techniques to wrangle the data into a true tidy format. For this lab, we won’t require you to do much cleaning. The goal of TidyTuesday is to apply your R skills, get feedback, explore other’s work, and connect with the greater #RStats community!

TidyTuesday is intended to be a safe and supportive environment.


Choosing Data, Exploring/Cleaning, and Tables

The TidyTuesday Repo contains weekly datasets for the last four years. There is a lot of documentation on the repo, please check it out. If you’re looking for some inspiration, #TidyTuesday is a popular hashtag on Twitter and other platforms.

Lets Find Some Data

rainfall <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-01-07/rainfall.csv')
temperature <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-01-07/temperature.csv')

temperature <- temperature %>% 
  mutate(year = as.numeric(format(date, "%Y",)),
         month = as.numeric(format(date, "%m")))

temperature$city_name = toupper(temperature$city_name)
rainfall$city_name = toupper(rainfall$city_name)

Rainfall and Temperature data of Australia https://github.com/rfordatascience/tidytuesday/blob/master/data/2020/2020-01-07/readme.md

library(viridis)

# Find the intersection of city names
common_cities <- intersect(temperature$city_name, rainfall$city_name)

# Filter both datasets to keep only the common cities
temperature <- temperature %>%
  filter(city_name %in% common_cities)
rainfall <- rainfall %>%
  filter(city_name %in% common_cities)

temperature <- temperature %>% 
  mutate(year = as.numeric(format(date, "%Y",)),
         month = as.numeric(format(date, "%m")))

temperature$city_name = toupper(temperature$city_name)
rainfall$city_name = toupper(rainfall$city_name)

temp_recent_mean <- temperature %>%
  filter(year >= 2000, year <= 2019) %>%
  mutate(city_name = str_to_title(city_name)) %>%
  filter(temp_type == "max") %>%
  group_by(year, city_name) %>%
  summarise(temperature = mean(temperature, na.rm = TRUE), .groups='drop')

ggplot(temp_recent_mean, aes(fill=city_name, x=year, y=temperature, color=city_name)) +
  geom_line() +
  facet_wrap(~city_name, nrow=3) +
  scale_x_continuous(name="Year", breaks=seq(2000, 2019, 5)) +
  scale_y_continuous(name="Max Temp in Celsius", breaks=seq(14, 30, 2)) +
  ggtitle("Temperature Variation in Each City: 2000-2019") +
  theme(legend.position="none")

rain_recent_mean <- rainfall %>%
  filter(year >= 2000, year <= 2019) %>%
  group_by(year, city_name) %>%
  summarise(mean_rain = mean(rainfall, na.rm=TRUE), .groups='drop') %>%
  arrange(desc(mean_rain))

ggplot(rain_recent_mean, aes(fill=city_name, x=year, y=mean_rain, color=city_name)) +
  geom_line() +
  geom_point() +
  scale_x_continuous(name="Year", breaks=seq(2000, 2019, 5)) +
  scale_y_continuous(name="Average Rain in mm", breaks=seq(1, 8, 1)) +
  facet_wrap(~city_name) +
  ggtitle("Average Rainfall in Cities: 2000-2019") +
  theme(legend.position="none", axis.text.x=element_text(size=6))

## Tables

library(dplyr)
library(knitr)

# Find common cities
common_cities <- intersect(temperature$city_name, rainfall$city_name)

# Filter datasets to keep only common cities and specified years
temp_recent_common <- temperature %>%
  filter(city_name %in% common_cities, year >= 2000, year <= 2019)

rain_recent_common <- rainfall %>%
  filter(city_name %in% common_cities, year >= 2000, year <= 2019)

# Calculate temperature summary statistics
summary_stats_temp <- temp_recent_common %>%
  group_by(city_name, year) %>%
  summarize(
    mean_temp = mean(temperature, na.rm = TRUE),
    median_temp = median(temperature, na.rm = TRUE),
    min_temp = min(temperature, na.rm = TRUE),
    max_temp = max(temperature, na.rm = TRUE),
    .groups = 'drop'
  )

# Calculate rainfall summary statistics
summary_stats_rain <- rain_recent_common %>%
  group_by(city_name, year) %>%
  summarize(
    mean_rain = mean(rainfall, na.rm = TRUE),
    median_rain = median(rainfall, na.rm = TRUE),
    percentile_75_rain = quantile(rainfall, 0.75, na.rm = TRUE),
    total_rain = sum(rainfall, na.rm = TRUE),
    .groups = 'drop'
  )

# Output summary tables
kable(summary_stats_temp, "html") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
  scroll_box(width = "100%", height = "500px")
city_name year mean_temp median_temp min_temp max_temp
BRISBANE 2000 20.08620 20.80 2.5 37.4
BRISBANE 2001 20.45110 21.60 2.6 36.7
BRISBANE 2002 20.46534 21.10 1.6 36.0
BRISBANE 2003 20.17521 21.00 1.8 35.1
BRISBANE 2004 20.63497 21.85 2.5 40.2
BRISBANE 2005 20.86644 21.60 4.1 35.4
BRISBANE 2006 20.46356 21.70 3.0 32.2
BRISBANE 2007 20.63767 21.40 -0.1 34.2
BRISBANE 2008 20.07910 20.90 2.6 39.8
BRISBANE 2009 20.90932 21.70 3.4 34.8
BRISBANE 2010 20.53000 21.15 5.0 32.1
BRISBANE 2011 20.06027 21.15 3.2 33.5
BRISBANE 2012 20.23844 21.00 3.7 35.9
BRISBANE 2013 20.69411 21.35 4.1 32.4
BRISBANE 2014 20.95918 21.80 2.6 34.3
BRISBANE 2015 20.65151 21.60 4.5 36.3
BRISBANE 2016 21.18470 21.65 4.6 36.3
BRISBANE 2017 21.23452 22.30 4.3 34.9
BRISBANE 2018 20.77205 21.70 3.3 38.1
BRISBANE 2019 23.43079 23.50 5.2 34.0
CANBERRA 2000 13.05683 13.10 -7.9 35.1
CANBERRA 2001 13.33278 13.90 -6.8 39.9
CANBERRA 2002 13.56096 13.35 -7.0 36.9
CANBERRA 2003 13.52452 13.40 -5.7 39.8
CANBERRA 2004 13.76325 13.20 -8.3 38.7
CANBERRA 2005 13.88137 13.55 -5.4 39.4
CANBERRA 2006 13.97466 14.05 -7.4 39.9
CANBERRA 2007 14.31397 13.60 -6.1 40.5
CANBERRA 2008 13.24727 13.00 -6.7 35.8
CANBERRA 2009 14.18411 13.20 -7.5 40.0
CANBERRA 2010 13.54262 13.70 -6.5 39.1
CANBERRA 2011 13.30836 14.00 -8.0 37.5
CANBERRA 2012 12.84221 13.20 -6.8 35.1
CANBERRA 2013 13.97411 13.70 -6.0 41.6
CANBERRA 2014 14.08233 13.70 -7.6 40.2
CANBERRA 2015 13.74609 13.50 -7.0 37.0
CANBERRA 2016 14.39098 14.00 -5.5 39.3
CANBERRA 2017 14.02428 13.80 -8.7 41.6
CANBERRA 2018 14.43000 13.70 -7.4 40.6
CANBERRA 2019 18.18775 17.40 -2.7 41.6
MELBOURNE 2000 15.94877 15.10 1.8 38.9
MELBOURNE 2001 15.65863 14.75 3.6 40.9
MELBOURNE 2002 15.66014 14.90 1.2 36.7
MELBOURNE 2003 15.54137 14.90 1.4 43.9
MELBOURNE 2004 15.34071 14.40 1.4 39.7
MELBOURNE 2005 16.05630 15.20 1.9 42.7
MELBOURNE 2006 15.43247 14.70 2.2 42.2
MELBOURNE 2007 16.53178 15.50 2.0 40.9
MELBOURNE 2008 15.65451 14.70 2.3 41.0
MELBOURNE 2009 16.19822 15.30 1.8 45.9
MELBOURNE 2010 15.93580 15.10 3.0 43.4
MELBOURNE 2011 15.82593 15.20 2.5 39.5
MELBOURNE 2012 15.79302 14.80 2.1 39.7
MELBOURNE 2013 16.27411 15.60 1.3 40.9
MELBOURNE 2014 16.47233 15.45 1.2 43.4
MELBOURNE 2015 15.91562 14.70 0.6 41.2
MELBOURNE 2016 16.17637 15.50 3.0 42.2
MELBOURNE 2017 16.26849 15.40 0.8 37.8
MELBOURNE 2018 16.22466 15.30 0.8 41.7
MELBOURNE 2019 18.98510 18.05 6.2 42.8
PERTH 2000 18.75533 18.50 0.4 41.1
PERTH 2001 18.07329 18.00 0.5 40.1
PERTH 2002 18.62493 18.40 1.0 43.2
PERTH 2003 18.81836 18.20 1.2 43.8
PERTH 2004 18.59385 18.20 0.1 42.1
PERTH 2005 17.92027 17.65 0.8 42.6
PERTH 2006 18.58904 18.50 -1.3 40.9
PERTH 2007 18.41027 18.20 2.5 44.5
PERTH 2008 18.57542 18.50 1.3 43.4
PERTH 2009 18.89589 17.80 0.9 43.0
PERTH 2010 18.91205 18.50 -1.0 43.2
PERTH 2011 19.79918 19.25 0.2 41.8
PERTH 2012 19.14686 18.85 -0.7 42.2
PERTH 2013 19.34315 18.95 -0.3 41.1
PERTH 2014 19.21041 18.70 1.0 43.5
PERTH 2015 19.43534 18.55 0.1 44.2
PERTH 2016 18.20505 17.70 0.9 43.0
PERTH 2017 18.95603 18.10 1.0 43.3
PERTH 2018 18.77174 18.20 2.5 40.9
PERTH 2019 21.34636 21.15 1.5 42.9
SYDNEY 2000 18.66585 19.00 5.6 38.6
SYDNEY 2001 18.92712 19.15 5.8 35.8
SYDNEY 2002 18.85995 18.90 4.3 38.5
SYDNEY 2003 18.61479 19.10 4.9 39.6
SYDNEY 2004 19.08142 19.40 5.9 39.9
SYDNEY 2005 19.09877 19.40 5.1 39.2
SYDNEY 2006 19.05603 19.30 5.8 45.0
SYDNEY 2007 19.09699 19.40 3.7 39.5
SYDNEY 2008 18.34891 18.70 5.3 35.5
SYDNEY 2009 19.18459 19.60 5.9 41.7
SYDNEY 2010 18.99712 19.00 4.3 42.1
SYDNEY 2011 18.66575 18.80 5.5 41.5
SYDNEY 2012 18.51421 18.90 5.5 34.2
SYDNEY 2013 19.37041 19.90 7.0 45.8
SYDNEY 2014 19.27912 19.55 5.5 36.5
SYDNEY 2015 19.03712 19.30 5.0 40.9
SYDNEY 2016 19.63251 19.85 5.4 39.2
SYDNEY 2017 19.44507 19.85 5.4 39.4
SYDNEY 2018 19.14033 19.50 4.6 43.4
SYDNEY 2019 22.03046 22.15 7.8 39.6
kable(summary_stats_rain, "html") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
  scroll_box(width = "100%", height = "500px")
city_name year mean_rain median_rain percentile_75_rain total_rain
BRISBANE 2000 1.9666667 0.0 0.600 743.4
BRISBANE 2001 2.9766162 0.0 0.400 2164.0
BRISBANE 2002 1.8958621 0.0 0.000 1374.5
BRISBANE 2003 2.3370474 0.0 0.200 1678.0
BRISBANE 2004 3.2303797 0.0 0.000 2296.8
BRISBANE 2005 2.0973050 0.0 0.200 1478.6
BRISBANE 2006 2.1753463 0.0 0.200 1570.6
BRISBANE 2007 1.9217391 0.0 0.400 1370.2
BRISBANE 2008 5.0038544 0.0 4.000 2336.8
BRISBANE 2009 4.6768018 0.0 2.200 2076.5
BRISBANE 2010 7.4293850 0.2 4.800 3261.5
BRISBANE 2011 4.7905312 0.0 3.600 2074.3
BRISBANE 2012 5.0582927 0.0 2.350 2073.9
BRISBANE 2013 4.8500000 0.0 2.200 2027.3
BRISBANE 2014 2.4940129 0.0 0.200 1541.3
BRISBANE 2015 5.1922727 0.0 0.400 2284.6
BRISBANE 2016 2.0925620 0.0 0.400 759.6
BRISBANE 2017 2.7191011 0.0 0.200 968.0
BRISBANE 2018 2.4689655 0.0 0.850 859.2
BRISBANE 2019 1.8033058 0.0 0.200 654.6
CANBERRA 2008 2.2940000 0.0 0.050 229.4
CANBERRA 2009 1.1819178 0.0 0.200 431.4
CANBERRA 2010 2.6290411 0.0 0.400 959.6
CANBERRA 2011 1.5870879 0.0 0.400 577.7
CANBERRA 2012 1.8950820 0.0 0.200 693.6
CANBERRA 2013 1.4624658 0.0 0.200 533.8
CANBERRA 2014 1.5663912 0.0 0.200 568.6
CANBERRA 2015 1.5113573 0.0 0.200 545.6
CANBERRA 2016 2.1535519 0.0 0.800 788.2
CANBERRA 2017 1.3388430 0.0 0.200 486.0
CANBERRA 2018 1.2967033 0.0 0.000 472.0
CANBERRA 2019 0.9824658 0.0 0.000 358.6
MELBOURNE 2000 1.6349398 0.0 0.600 542.8
MELBOURNE 2001 1.8050147 0.0 1.200 611.9
MELBOURNE 2002 1.2055249 0.0 0.775 436.4
MELBOURNE 2003 1.3473973 0.0 0.800 491.8
MELBOURNE 2004 1.6473829 0.0 0.900 598.0
MELBOURNE 2005 1.5087912 0.0 0.800 549.2
MELBOURNE 2006 1.0264264 0.0 0.400 341.8
MELBOURNE 2007 1.2233533 0.0 0.800 408.6
MELBOURNE 2008 1.1803279 0.0 0.600 432.0
MELBOURNE 2009 1.1369863 0.0 0.600 415.0
MELBOURNE 2010 1.9949580 0.0 1.000 712.2
MELBOURNE 2011 2.3900277 0.0 1.000 862.8
MELBOURNE 2012 1.6983516 0.0 1.600 618.2
MELBOURNE 2013 1.8327824 0.0 1.400 665.3
MELBOURNE 2014 1.3164835 0.0 0.800 479.2
MELBOURNE 2015 1.2922636 0.0 0.600 451.0
MELBOURNE 2016 1.9286932 0.0 1.450 678.9
MELBOURNE 2017 1.7706371 0.0 1.000 639.2
MELBOURNE 2018 1.4876033 0.0 0.600 540.0
MELBOURNE 2019 1.2196133 0.0 1.200 441.5
PERTH 2000 1.9900826 0.0 0.650 722.4
PERTH 2001 1.9284507 0.0 0.000 684.6
PERTH 2002 1.7083102 0.0 0.200 616.7
PERTH 2003 2.1250689 0.0 0.800 771.4
PERTH 2004 1.5684211 0.0 0.400 566.2
PERTH 2005 2.2229917 0.0 0.900 802.5
PERTH 2006 1.2200000 0.0 0.000 445.3
PERTH 2007 1.8549451 0.0 0.825 675.2
PERTH 2008 2.1204420 0.0 0.875 767.6
PERTH 2009 1.5241758 0.0 0.400 554.8
PERTH 2010 1.4082418 0.0 0.000 512.6
PERTH 2011 2.1866667 0.0 0.200 787.2
PERTH 2012 1.7879656 0.0 0.200 624.0
PERTH 2013 2.0713889 0.0 0.425 745.7
PERTH 2014 1.7834254 0.0 0.000 645.6
PERTH 2015 1.5380556 0.0 0.000 553.7
PERTH 2016 2.0519663 0.0 0.400 730.5
PERTH 2017 2.0490028 0.0 0.200 719.2
PERTH 2018 2.1263736 0.0 0.200 774.0
PERTH 2019 1.5082192 0.0 0.000 550.5
SYDNEY 2000 2.2448087 0.0 0.800 821.6
SYDNEY 2001 3.7227397 0.0 1.200 1358.8
SYDNEY 2002 2.3561644 0.0 0.400 860.0
SYDNEY 2003 3.2887671 0.0 1.200 1200.4
SYDNEY 2004 2.7191257 0.0 0.550 995.2
SYDNEY 2005 2.2356164 0.0 0.600 816.0
SYDNEY 2006 2.7232877 0.0 1.000 994.0
SYDNEY 2007 4.1073973 0.0 1.200 1499.2
SYDNEY 2008 2.9579235 0.0 1.800 1082.6
SYDNEY 2009 2.6197260 0.0 0.800 956.2
SYDNEY 2010 3.1878453 0.0 1.800 1154.0
SYDNEY 2011 3.7512329 0.0 2.000 1369.2
SYDNEY 2012 3.3158470 0.0 1.200 1213.6
SYDNEY 2013 3.6832877 0.0 0.600 1344.4
SYDNEY 2014 2.4853186 0.0 1.600 897.2
SYDNEY 2015 3.6663014 0.0 1.400 1338.2
SYDNEY 2016 3.7857923 0.0 0.750 1385.6
SYDNEY 2017 2.9630854 0.0 0.700 1075.6
SYDNEY 2018 2.7478022 0.0 0.800 1000.2
SYDNEY 2019 2.3336986 0.0 0.200 851.8

Insights from Datasets:

  • Upward Temperature Trend: There is a noticeable upward trend in temperatures, particularly in the later years of the dataset, with several cities showing higher average and median temperatures. This is consistent with the global trend of rising temperatures and could be indicative of climate change effects.

  • Seasonal Variability: The wide range between the minimum and maximum temperatures for each city within each year indicates strong seasonal variability. This is expected, but the range also shows the potential for extreme weather events, as seen with some very high maximum temperatures.

  • Rainfall Variability: The rainfall data exhibits considerable yearly variability within cities, with certain years showing peaks that could correspond to specific heavy rainfall events. This suggests that rainfall is not consistent year-to-year and is subject to larger climatic patterns or events.

  • Data Quality: Initially, the median rainfall values being zero indicated potential data issues or many zero-rainfall days. However, the 75th percentile values showed that significant rainfall did occur, indicating the data likely captured actual variations in rainfall.

  • Extreme Values: The dataset contains some extreme temperature values, both hot and cold, which could warrant further investigation to understand if they are outliers, errors, or true weather phenomena.

  • Consistency Across Years: While there is year-to-year variation, the temperature data generally show a consistent pattern over the two-decade span, with the recent years standing out for their higher values.

Time, Date, and Timeseries

Time Conversion

Timeseries is all about a repeating trend. Sometimes you don’t know that there is a recurring trend until you create the data as a timeseries. Here, we’ll go ahead and use ts() on the variables that, in your opinion, it would make sense to apply it to.

The frequency argument inside ts() dictates how many observations should be considered in each cycle of the recurring trend.

  • weekly, monthly, yearly, etc.
library(lubridate)
library(zoo)

rainfall$date <- as.Date(paste(rainfall$year, rainfall$month, rainfall$day, sep = "-"), "%Y-%m-%d")
temperature$date <- as.Date(temperature$date)

rainfall_filtered <- subset(rainfall, year >= 2000 & (year < 2019 | (year == 2019 & as.Date(paste(year, month, day, sep="-")) <= as.Date("2019-05-31"))))
temperature_filtered <- subset(temperature, year >= 2000 & (year < 2019 | (year == 2019 & date <= as.Date("2019-05-31"))))

rainfall_filtered %>% 
  summarise(across(everything(), ~sum(is.na(.))))
## # A tibble: 1 × 12
##   station_code city_name  year month   day rainfall period quality   lat  long
##          <int>     <int> <int> <int> <int>    <int>  <int>   <int> <int> <int>
## 1            0         0     0     0     0     3160  18974    3164     0     0
## # ℹ 2 more variables: station_name <int>, date <int>
temperature_filtered %>% 
  summarise(across(everything(), ~sum(is.na(.))))
## # A tibble: 1 × 7
##   city_name  date temperature temp_type site_name  year month
##       <int> <int>       <int>     <int>     <int> <int> <int>
## 1         0     0          34         0         0     0     0
numeric_columns <- sapply(rainfall_filtered, is.numeric)
rainfall_filtered[numeric_columns] <- lapply(rainfall_filtered[numeric_columns], function(x) {ifelse(is.na(x), 0, x)})

temperature_filtered <- na.omit(temperature_filtered)

yearly_rainfall_by_city <- aggregate(rainfall ~ city_name + year, data = rainfall_filtered, sum)
yearly_temperature_by_city <- aggregate(temperature ~ city_name + year, data = temperature_filtered, mean)

monthly_rainfall_by_city <- aggregate(rainfall ~ city_name + year + month, data = rainfall_filtered, sum)
monthly_temperature_by_city <- aggregate(temperature ~ city_name + year + month, data = temperature_filtered, mean)

rainfall_list <- split(yearly_rainfall_by_city, yearly_rainfall_by_city$city_name)
temperature_list <- split(yearly_temperature_by_city, yearly_temperature_by_city$city_name)


rainfall_ts_list <- lapply(rainfall_list, function(df) {
    start_year <- ifelse(df$city_name[1] == "Canberra", 2008, min(df$year))
    end_year <- max(df$year)
    ts(df$rainfall, start=start_year, end=end_year, frequency=1)
})


temperature_ts_list <- lapply(temperature_list, function(df) {
    ts(df$temperature, start=min(df$year), end=max(df$year), frequency=1)
})

monthly_rainfall_list <- split(monthly_rainfall_by_city, monthly_rainfall_by_city$city_name)
monthly_temperature_list <- split(monthly_temperature_by_city, monthly_temperature_by_city$city_name)


monthly_rainfall_ts_list <- lapply(monthly_rainfall_list, function(df) {
    start_year <- ifelse(df$city_name[1] == "Canberra", 2008, min(df$year))
    ts(df$rainfall, start = c(start_year, min(df$month[df$year == start_year])), frequency = 12)
})

monthly_temperature_ts_list <- lapply(monthly_temperature_list, function(df) {
    ts(df$temperature, start = c(min(df$year), min(df$month)), frequency = 12)
})

# Function to convert a time series object to a data frame
convert_ts_to_df <- function(ts_obj, city_name, value_name) {
  years <- as.integer(time(ts_obj))
  values <- as.vector(ts_obj)
  data.frame(city_name = city_name, year = years, value = values, stringsAsFactors = FALSE)
}

# Convert each time series in the list to a data frame and then combine them
rainfall_df <- do.call(rbind, lapply(names(rainfall_ts_list), function(city) {
  convert_ts_to_df(rainfall_ts_list[[city]], city, "rainfall")
}))

temperature_df <- do.call(rbind, lapply(names(temperature_ts_list), function(city) {
  convert_ts_to_df(temperature_ts_list[[city]], city, "temperature")
}))
library(stats)
library(tseries)
library(forecast)
library(tidyr)



adf_test_results_monthly_temperature <- lapply(monthly_temperature_ts_list, adf.test)
adf_test_results_monthly_temperature
## $BRISBANE
## 
##  Augmented Dickey-Fuller Test
## 
## data:  X[[i]]
## Dickey-Fuller = -0.33177, Lag order = 6, p-value = 0.9889
## alternative hypothesis: stationary
## 
## 
## $CANBERRA
## 
##  Augmented Dickey-Fuller Test
## 
## data:  X[[i]]
## Dickey-Fuller = -0.05211, Lag order = 6, p-value = 0.99
## alternative hypothesis: stationary
## 
## 
## $MELBOURNE
## 
##  Augmented Dickey-Fuller Test
## 
## data:  X[[i]]
## Dickey-Fuller = -0.056618, Lag order = 6, p-value = 0.99
## alternative hypothesis: stationary
## 
## 
## $PERTH
## 
##  Augmented Dickey-Fuller Test
## 
## data:  X[[i]]
## Dickey-Fuller = -0.067175, Lag order = 6, p-value = 0.99
## alternative hypothesis: stationary
## 
## 
## $SYDNEY
## 
##  Augmented Dickey-Fuller Test
## 
## data:  X[[i]]
## Dickey-Fuller = -0.16611, Lag order = 6, p-value = 0.99
## alternative hypothesis: stationary
adf_test_results_monthly_rainfall <- lapply(monthly_rainfall_ts_list, adf.test)
adf_test_results_monthly_rainfall
## $BRISBANE
## 
##  Augmented Dickey-Fuller Test
## 
## data:  X[[i]]
## Dickey-Fuller = -3.7515, Lag order = 6, p-value = 0.02218
## alternative hypothesis: stationary
## 
## 
## $CANBERRA
## 
##  Augmented Dickey-Fuller Test
## 
## data:  X[[i]]
## Dickey-Fuller = -4.0377, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary
## 
## 
## $MELBOURNE
## 
##  Augmented Dickey-Fuller Test
## 
## data:  X[[i]]
## Dickey-Fuller = -6.1863, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
## 
## 
## $PERTH
## 
##  Augmented Dickey-Fuller Test
## 
## data:  X[[i]]
## Dickey-Fuller = -1.6024, Lag order = 6, p-value = 0.743
## alternative hypothesis: stationary
## 
## 
## $SYDNEY
## 
##  Augmented Dickey-Fuller Test
## 
## data:  X[[i]]
## Dickey-Fuller = -4.8967, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
adf_test_results_yearly_rainfall <- lapply(rainfall_ts_list, adf.test)
adf_test_results_yearly_rainfall
## $BRISBANE
## 
##  Augmented Dickey-Fuller Test
## 
## data:  X[[i]]
## Dickey-Fuller = -0.65474, Lag order = 2, p-value = 0.9621
## alternative hypothesis: stationary
## 
## 
## $CANBERRA
## 
##  Augmented Dickey-Fuller Test
## 
## data:  X[[i]]
## Dickey-Fuller = -1.2307, Lag order = 2, p-value = 0.8655
## alternative hypothesis: stationary
## 
## 
## $MELBOURNE
## 
##  Augmented Dickey-Fuller Test
## 
## data:  X[[i]]
## Dickey-Fuller = -1.5964, Lag order = 2, p-value = 0.7261
## alternative hypothesis: stationary
## 
## 
## $PERTH
## 
##  Augmented Dickey-Fuller Test
## 
## data:  X[[i]]
## Dickey-Fuller = -3.7968, Lag order = 2, p-value = 0.03594
## alternative hypothesis: stationary
## 
## 
## $SYDNEY
## 
##  Augmented Dickey-Fuller Test
## 
## data:  X[[i]]
## Dickey-Fuller = -1.4466, Lag order = 2, p-value = 0.7832
## alternative hypothesis: stationary
adf_test_results_yearly_temperature <- lapply(temperature_ts_list, adf.test)
adf_test_results_yearly_temperature
## $BRISBANE
## 
##  Augmented Dickey-Fuller Test
## 
## data:  X[[i]]
## Dickey-Fuller = 0.10806, Lag order = 2, p-value = 0.99
## alternative hypothesis: stationary
## 
## 
## $CANBERRA
## 
##  Augmented Dickey-Fuller Test
## 
## data:  X[[i]]
## Dickey-Fuller = 0.29541, Lag order = 2, p-value = 0.99
## alternative hypothesis: stationary
## 
## 
## $MELBOURNE
## 
##  Augmented Dickey-Fuller Test
## 
## data:  X[[i]]
## Dickey-Fuller = -1.3562, Lag order = 2, p-value = 0.8177
## alternative hypothesis: stationary
## 
## 
## $PERTH
## 
##  Augmented Dickey-Fuller Test
## 
## data:  X[[i]]
## Dickey-Fuller = -2.9611, Lag order = 2, p-value = 0.2063
## alternative hypothesis: stationary
## 
## 
## $SYDNEY
## 
##  Augmented Dickey-Fuller Test
## 
## data:  X[[i]]
## Dickey-Fuller = 0.14916, Lag order = 2, p-value = 0.99
## alternative hypothesis: stationary

Which relationships are evident? Are they a long-term trend or do they just exhibit seasonality?

  • Monthly Temperature Dataset (First Set of Results): Most cities (like Brisbane, Canberra, Kent, Melbourne) have high p-values (close to 0.99), suggesting non-stationarity, indicating the presence of trends in the monthly temperature data.

  • Monthly Rainfall Dataset (Second Set of Results): Some cities (like Brisbane, Canberra, Melbourne, Sydney) show low p-values (e.g., < 0.05), indicating stationarity and thus no significant trend. Other cities show high p-values, suggesting non-stationarity and potential trends.

  • Yearly Rainfall Dataset (Third Set of Results): Cities like Perth show low p-values, indicating stationarity and no significant long-term trend.Other cities have higher p-values, suggesting potential trends.

  • Yearly Temperature Dataset (Fourth Set of Results): Cities like Brisbane and Canberra show high p-values, indicating non-stationarity and possible trends. Other cities, like Perth, show lower p-values, suggesting no significant long-term trend.

Plotting

common_cities <- intersect(names(temperature_ts_list), names(rainfall_ts_list))

# Rainfall plot
rainfall_df_common <- do.call(rbind, lapply(common_cities, function(city) {
  convert_ts_to_df(rainfall_ts_list[[city]], city, "rainfall")
}))

ggplot(rainfall_df_common, aes(x = year, y = value, group = city_name, color = city_name)) +
  geom_line(lwd=1.1) +
  facet_wrap(~ city_name, scales = "free_x") +
  labs(title = "Yearly Rainfall by City (2000-2019)", x = "Year", y = "Total Rainfall") +
  first_custom() +
  theme(legend.position = "none") 

# Temperature plot
temperature_df_common <- do.call(rbind, lapply(common_cities, function(city) {
  convert_ts_to_df(temperature_ts_list[[city]], city, "temperature")
}))

ggplot(temperature_df_common, aes(x = year, y = value, group = city_name, color = city_name)) +
  geom_line(lwd=1.1) +
  facet_wrap(~ city_name, scales = "free_x") +
  labs(title = "Yearly Mean Temperature by City (2000-2019)", x = "Year", y = "Mean Temperature") +
  first_custom() +
  theme(legend.position = "none")

The plots of temperature and rainfall trends by city from 2000 to 2019 show:

Seasonal patterns are evident in both temperature and rainfall data. There’s a slight upward trend in temperatures for most cities, suggesting a warming pattern. Rainfall data is more variable with no clear long-term trend.

Predicting 100 years into the future is complex, but if current warming trends persist, temperatures are likely to continue rising. Rainfall predictions are less certain due to their variability.

Geospatial

Plotting

Geospatial data can get very complicated very quickly with

  • different coordinate systems
  • data formats like shapefile, raster, tiff
  • data grouping
  • and many different specialized packages for handling this data.

There are powerful applications like ArcGIS that specialize in processing this data. But we will be just fine with ggplot2. This week’s TidyTuesday project happens to involve geospatial data!

  • map_data()
  • geom_sf()
  • group =
  • coord_quickmap()
library(sf)
library(raster)
library(dplyr)
library(ggrepel)
library(viridis)
library(ggplot2)
library(rnaturalearth)
library(rnaturalearthhires)
# Load Australia shapefile

australia <- rnaturalearth::ne_states(geounit = "australia")
sp::plot(australia)

# Load and plot raster data
grid_raster <- raster("2019120120191231.grid")

plot(grid_raster)

climate <- as(grid_raster, "SpatialPixelsDataFrame")
climate <- as.data.frame(climate)
climate <- climate %>%
  rename(temp = X2019120120191231)

# Select and clean Australia cities data
australia_cities <- rainfall %>%
  dplyr::select(city_name, long, lat) %>%
  distinct()
australia_cities <- australia_cities[-4,]

# Create the plot
tempdec <- ggplot() +
  geom_tile(data = climate, aes(x = x, y = y, fill = temp)) +
  geom_sf(data = australia, fill = NA, color = "#3E3E3E", size = 0.1) +
  geom_point(data = australia_cities, aes(x = long, y = lat), color = "white") +
  geom_text_repel(data = australia_cities, aes(x = long, y = lat, label = city_name), size = 3, color = "white") +
  scale_fill_viridis_c(option = "B") +
  coord_sf(crs = st_crs(australia)) +
  theme_void() +
  theme(
    panel.background = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    legend.background = element_blank(),
    axis.text = element_blank(),
    axis.title = element_blank(),
    plot.caption = element_text(vjust = 1.1, hjust = 1.3, size = 7),
    legend.position = "right",
    legend.justification = "center",
    legend.box.just = "right"
  ) +
  labs(
    title = "Australia Climate",
    subtitle = "Average High Temperatures for December 2019",
    caption = "Australian Bureau of Meteorology"
  )

# Plot the result
print(tempdec)

The plot is a thematic map depicting the average high temperatures across Australia for December 2019. It employs a gradient color scheme, where warmer colors represent higher temperatures, creating a visual representation of the heat distribution. Major cities are marked with white points, and labels provide clear identification, adding a spatial context to the temperature data.


Links