I’ve been wanting to do some analysis on some weather data for a while. Mostly to see whether (pun not intended) there are any longer term trends that are observable from figures recorded by the Bureau of Meteorology.

There are a lot of historic stations, and a lot of data that could be assessed. To keep things contained I’ve decided to look at a single city, albeit built from several recording weather stations due to the long term availability of the data.

The data was sourced from the Bureau of Meteorology Climate Data website. I’ve downloaded rainfall data for Brisbane (station 40913), Eagle Farm (station 40212) and Toowong (station 40245), as well as temperature data for Brisbane (station 40913), Brisbane Regional Office (station 40214 [closed]) and Brisbane Aero (station 40223 [closed])

The data analysis

library(tidyverse)

tmp <- list.files("../../data/bneweather/", pattern = "*.csv", full.names = TRUE)
file.names <- list.files("../../data/bneweather/", pattern = "*.csv")
list2env(lapply(setNames(tmp, make.names(gsub("*.csv$", "", file.names))), 
                read_csv), envir = .GlobalEnv)
## <environment: R_GlobalEnv>
BOM_MaxTemperature_Brisbane_ID040913
## # A tibble: 6,561 x 8
##    `Product code` `Bureau of Meteoro…  Year Month Day   `Maximum temperat…
##    <chr>          <chr>               <int> <chr> <chr>              <dbl>
##  1 IDCJAC0010     040913               1999 01    01                    NA
##  2 IDCJAC0010     040913               1999 01    02                    NA
##  3 IDCJAC0010     040913               1999 01    03                    NA
##  4 IDCJAC0010     040913               1999 01    04                    NA
##  5 IDCJAC0010     040913               1999 01    05                    NA
##  6 IDCJAC0010     040913               1999 01    06                    NA
##  7 IDCJAC0010     040913               1999 01    07                    NA
##  8 IDCJAC0010     040913               1999 01    08                    NA
##  9 IDCJAC0010     040913               1999 01    09                    NA
## 10 IDCJAC0010     040913               1999 01    10                    NA
## # ... with 6,551 more rows, and 2 more variables: `Days of accumulation of
## #   maximum temperature` <int>, Quality <chr>
BOM_Rainfall_Brisbane_ID040913
## # A tibble: 6,562 x 8
##    `Product code` `Bureau of Meteoro…  Year Month Day   `Rainfall amount …
##    <chr>          <chr>               <int> <chr> <chr>              <dbl>
##  1 IDCJAC0009     040913               1999 01    01                    NA
##  2 IDCJAC0009     040913               1999 01    02                    NA
##  3 IDCJAC0009     040913               1999 01    03                    NA
##  4 IDCJAC0009     040913               1999 01    04                    NA
##  5 IDCJAC0009     040913               1999 01    05                    NA
##  6 IDCJAC0009     040913               1999 01    06                    NA
##  7 IDCJAC0009     040913               1999 01    07                    NA
##  8 IDCJAC0009     040913               1999 01    08                    NA
##  9 IDCJAC0009     040913               1999 01    09                    NA
## 10 IDCJAC0009     040913               1999 01    10                    NA
## # ... with 6,552 more rows, and 2 more variables: `Period over which
## #   rainfall was measured (days)` <int>, Quality <chr>

Initial Plots

Lets see a simple plot of the rainfall data.

tbl1 <- filter(BOM_Rainfall_Brisbane_ID040913, 
              `Rainfall amount (millimetres)` > 0) %>% 
  rename(Rainfall = `Rainfall amount (millimetres)`, 
         Station_num = `Bureau of Meteorology station number`) %>% 
  mutate(Date = as.Date(paste(Year, Month, Day, sep = "-")),
         Station_name = "Brisbane") %>% 
  select(Station_num, Station_name, Year, Month, Day, Date, Rainfall)

ggplot(tbl1, aes(Date, Rainfall)) + 
  xlab("") +
  geom_point(colour = tdc_blue, size = .5) +
  geom_col(fill = tdc_blue, width = 1) + 
  theme_tdc()

tbl1 <- filter(BOM_MaxTemperature_Brisbane_ID040913, 
               !is.na(`Maximum temperature (Degree C)`)) %>% 
  rename(Max_temp = `Maximum temperature (Degree C)`, 
         Station_num = `Bureau of Meteorology station number`) %>% 
  mutate(Date = as.Date(paste(Year, Month, Day, sep = "-")),
         Station_name = "Brisbane") %>% 
  select(Station_num, Station_name, Year, Month, Day, Date, Max_temp)

ggplot(tbl1, aes(Date, Max_temp)) + 
  labs(title = "Daily Maximum Temperature for Brisbane, Australia",
       subtitle = "Source: Bureau of Meteorology stations 40214, 40223 & 40913",
       x = "", y = "Maximum Temperature (degrees C)") +
  geom_line(color = tdc_blue, size = .1) +
  theme_tdc()

Data Wrangling

Lets now combine our data sets. Our data sets do overlap so I’m going to choose an arbitrary points at which to change from one weather station to another. For temperature I have decided to use:

  • Brisbane Region Office (station 40214): 1900 - 1959
  • Brisbane Aero (station 40223): 1960 - 1999
  • Brisbane (station 40913): 2000 - current

For rainfall I have decided to use:

  • Toowong (station 40245): 1900 - 1999
  • Brisbane (station 40913): 2000 - current
max_temp_bro <- filter(BOM_MaxTemperature_BrisbaneRO_ID040214, 
                       Year >= 1900 & Year <= 1959) %>% 
  mutate(Station_name = "Brisbane Regional Office")
max_temp_ba <- filter(BOM_MaxTemperature_BrisbaneAero_ID040223, 
                      Year >= 1960 & Year <= 1999)  %>% 
  mutate(Station_name = "Brisbane Aero")
max_temp_bne <- filter(BOM_MaxTemperature_Brisbane_ID040913, Year >= 2000)  %>% 
  mutate(Station_name = "Brisbane")

max_temp <- bind_rows(max_temp_bro, max_temp_ba)
max_temp <- bind_rows(max_temp, max_temp_bne)

max_temp <- rename(max_temp, Value = `Maximum temperature (Degree C)`, 
                   Station_num = `Bureau of Meteorology station number`) %>% 
  mutate(Date = as.Date(paste(Year, Month, Day, sep = "-")),
         Data_type = "Maximum Temperature") %>% 
  select(Station_num, Station_name, Year, Month, Day, Date, Data_type, Value)

min_temp_bro <- filter(BOM_MinTemperature_BrisbaneRO_ID040214, 
                       Year >= 1900 & Year <= 1959) %>% 
  mutate(Station_name = "Brisbane Regional Office")
min_temp_ba <- filter(BOM_MinTemperature_BrisbaneAero_ID040223,  
                      Year >= 1960 & Year <= 1999)  %>% 
  mutate(Station_name = "Brisbane Aero")
min_temp_bne <- filter(BOM_MinTemperature_Brisbane_ID040913, Year >= 2000)  %>% 
  mutate(Station_name = "Brisbane")

min_temp <- bind_rows(min_temp_bro, min_temp_ba)
min_temp <- bind_rows(min_temp, min_temp_bne)

min_temp <- rename(min_temp, Value = `Minimum temperature (Degree C)`, 
                   Station_num = `Bureau of Meteorology station number`) %>% 
  mutate(Date = as.Date(paste(Year, Month, Day, sep = "-")),
         Data_type = "Minimum Temperature") %>% 
  select(Station_num, Station_name, Year, Month, Day, Date, Data_type, Value)

rainfall_twng <- filter(BOM_Rainfall_Toowong_ID40245, 
                        Year >= 1900 & Year <= 1999)  %>% 
  mutate(Station_name = "Toowong Bowls Club")
rainfall_bne <- filter(BOM_Rainfall_Brisbane_ID040913, Year >= 2000)  %>% 
  mutate(Station_name = "Brisbane")

rainfall <- bind_rows(rainfall_twng, rainfall_bne)

rainfall <- rename(rainfall, Value = `Rainfall amount (millimetres)`, 
                   Station_num = `Bureau of Meteorology station number`) %>% 
  mutate(Date = as.Date(paste(Year, Month, Day, sep = "-")),
         Data_type = "Rainfall (mm)") %>% 
  select(Station_num, Station_name, Year, Month, Day, Date, Data_type, Value)

weather <- bind_rows(max_temp, min_temp)
weather <- bind_rows(weather, rainfall)

Now that our data is well structured or tidy we can start plotting some different things. Firstly, lets have a look at the daily temperature ranges since 2000.

tbl1 <- select(weather, Year, Month, Day, Date, Data_type, Value) %>% 
  filter(Year > 2000) %>% 
  spread(Data_type, Value)

ggplot(tbl1, aes(Date)) +
labs(title = "Daily temperature ranges for Brisbane, Australia",
     subtitle = "Source: Bureau of Meteorology stations 40214, 40223 & 40913",
     y = "Temperature Range (degrees C)", x = "") + 
  geom_ribbon(aes(ymin = `Minimum Temperature`, ymax = `Maximum Temperature`), 
              fill = tdc_blue, alpha = .5) +
  theme_tdc()

We have data as far back as 1900, however looking at individual daily data points is likely to be a little unwieldy and won’t be likely to show us much more than what we already see. Instead lets look to take the mean monthly high and low temperatures and have a look at their ranges.

tbl1 <- select(weather, Year, Month, Data_type, Value) %>% 
  mutate(Date = as.Date(paste(Year, Month, "01", sep = "-"))) %>% 
  filter(Year > 1900) %>% 
  group_by(Year, Month, Date, Data_type) %>% 
  summarise(Value = mean(Value, na.rm = T)) %>% 
  spread(Data_type, Value)

ggplot(tbl1, aes(Date)) +
labs(title = "Average monthly temperature ranges for Brisbane, Australia",
     subtitle = "Source: Bureau of Meteorology stations 40214, 40223 & 40913",
     y = "Temperature Range (degrees C)", x = "") + 
  geom_ribbon(aes(ymin = `Minimum Temperature`, ymax = `Maximum Temperature`), 
              fill = tdc_blue, alpha = .5) +
  theme_tdc()

We can see in the chart above the seasonality of each month, but we can’t really tell if temperatures have been rising or not over an extended period of time. It makes sense that we might try and put a linear model (lm) through this to see whether there is any change over time or not. Given we’re working with high and low temperatures we’ll need to include two smoothers.

ggplot(tbl1, aes(Date)) +
  labs(title = "Average monthly temperature ranges for Brisbane, Australia",
     subtitle = "Source: Bureau of Meteorology stations 40214, 40223 & 40913",
     y = "Temperature Range (degrees C)", x = "") + 
  geom_ribbon(aes(ymin = `Minimum Temperature`, ymax = `Maximum Temperature`), 
              fill = tdc_blue, alpha = .2) +
  geom_smooth(aes(y = `Maximum Temperature`), method = "lm", fill = tdc_blue) +
  geom_smooth(aes(y = `Minimum Temperature`), method = "lm", fill = tdc_blue) +
  theme_tdc()

It looks like the maximum temperature has increased slightly over the last 100 years. A loess model shows a curve with a slight trough in the middle, however the standard error is much larger than with a linear model. We can also observe an increase in average monthly temperature for the minimum temperature too (a slightly higher rate than the maximum temperature).

Based on this it appears that the average temperature variance per month is shrinking. This is the average monthly maximum temperature minus the average monthly minimum temperature. The chart below shows the monthly average monthly temperature variance, with a linear model plotted over the top. It certainly would appear that the variability in temperature has been declining.

tbl1 <- mutate(tbl1, Range_Variance = `Maximum Temperature` - `Minimum Temperature`)

ggplot(tbl1, aes(Date)) +
labs(title = "Average monthly temperature variance for Brisbane, Australia",
     subtitle = "Source: Bureau of Meteorology stations 40214, 40223 & 40913",
     y = "Temperature Variance (degrees C)", x = "") + 
  geom_line(aes(y = Range_Variance), colour = tdc_blue, alpha = .5) + 
  geom_smooth(aes(y = Range_Variance), method = "lm", fill = tdc_blue) +
  theme_tdc()

Conclusion

While it is difficult to say conlusively that temperatures have been rising over the past 100 years, it certainly does appear that way. It also seems as though we’re getting less variability between our low and high temperatures each month. It would be interesting to take the time to redo this analysis for another city or two, possibly taking the time to look at a few capital cities.

Appendix

I wanted to have a trial at a few different and interesting ways to visualise the weather data over the course of a few years. Below are a few concepts. Let me know your thoughts in the comments.

tbl1 <- weather %>% 
  filter(Year > 2002 & Data_type == "Rainfall (mm)" & Value > 0) %>% 
  group_by(Year, Month) %>% 
  summarise(Value = sum(Value))

ggplot(tbl1, aes(Month, Value)) +
  labs(title = "Monthly Rainfall Ammounts",
       subtitle = "Source: Bureau of Meteorology stations 40245 & 40913",
       x = "", y = "Rainfall (mm)") + 
  geom_col(fill = tdc_blue, width = 0.3) +
  coord_polar() + 
  facet_wrap(~ Year, ncol = 7) +
  scale_x_discrete(label = month.abb) +
  theme_tdc() +
  theme(axis.text.x = element_text(angle = 0, hjust = 1, vjust = 0.5))

tbl1 <- weather %>% 
  filter(Year > 1900 & Data_type == "Rainfall (mm)" & Value > 0) %>% 
  group_by(Year, Month) %>% 
  summarise(Value = sum(Value))

ggplot(tbl1, aes(Month, Value)) +
  labs(title = "Monthly Rainfall Ammounts since 1900",
       subtitle = "Source: Bureau of Meteorology stations 40245 & 40913",
       x = "", y = "Rainfall (mm)") + 
  geom_line(colour = tdc_blue, alpha = .2) +
  geom_point(colour = tdc_blue, alpha = .2) +
  aes(group = Year) +
  scale_x_discrete(label = month.abb) +
  theme_tdc() +
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5, vjust = 0.5),
        legend.position = "none")

tbl1 <- weather %>% 
  filter(Data_type != "Rainfall (mm)" & !is.na(Value)) %>% 
  group_by(Year, Month) %>% 
  summarise(Max_Temp = max(Value[Data_type == "Maximum Temperature"]),
            Min_Temp = min(Value[Data_type == "Minimum Temperature"])) %>% 
  gather("Data_Type", "Value", -Year, -Month)

ggplot(tbl1, mapping = aes(Month, Value, colour = Data_Type)) +
  labs(title = "Maximum & minimum monthly temperatures since 1900",
       subtitle = "Source: Bureau of Meteorology stations 40214, 40223 & 40913",
       x = "", y = "Temperature (c)") +
  geom_line(data = filter(tbl1, Data_Type == "Max_Temp"), alpha = 0.25) +
  geom_point(alpha = 0.3) +
  geom_line(data = filter(tbl1, Data_Type == "Min_Temp"), alpha = 0.25) +
  aes(group = Year) +
  scale_color_manual(values = tdc_palette[2:1]) +
  scale_x_discrete(label = month.abb) +
  theme_tdc() +
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5, vjust = 0.5),
        legend.position = "bottom")