Download groundwater data from USGS using:
library(tidyverse)
library(sp)
library(geojsonio)
library(dataRetrieval) # USGS-R
library(lubridate)
library(stringr)
library(scales)
library(dygraphs)
library(xts)
library(DT)
library(leaflet)
library(htmltools)
library(RColorBrewer)
library(geojsonio)
regions_json = 'data/regions.geojson'
gw_data_csv = 'data/waterservices.usgs.gov/usgs_groundwater_data.csv'
gw_layer_csv = 'layers/groundwater_scores.csv'
#if (!file.exists(gw_data_csv)){
# read regions from geojson file
regions = geojson_read(regions_json, what='sp')
# get sites in bounding box of regions having groundwater data
bb = bbox(regions) %>% as.vector() %>% round(digits=7)
sites <- whatNWISsites(bBox=bb) %>% as_tibble()
# get points from sites
pts = sites
coordinates(pts)= ~ dec_long_va + dec_lat_va
proj4string(pts) = proj4string(regions)
# find region for sites
site_rgn = over(pts, regions)
# filter points and save
pts@data = pts@data %>%
cbind(site_rgn)
pts = subset(pts, !is.na(region_id))
geojson_write(pts, file='data/usgs_stations.geojson', overwrite=T, verbose=F)
# join overlapping region to sites
sites = sites %>%
cbind(site_rgn) %>%
as_tibble() %>%
filter(!is.na(region_id)) %>%
mutate(
site_no = as.double(site_no))
# get site data, paging through readNWISgwl() results which are limited by 2K characters in URL
i_seq = c(seq(1, nrow(sites), by=100), nrow(sites))
for (i in 1:(length(i_seq)-1)){ # i=2
i_beg = i_seq[i]
i_end = i_seq[i+1]-1
#cat(sprintf('%02d: %d - %d\n', i, i_beg, i_end))
d_i = readNWISgwl(sites$site_no[i_beg:i_end], convertType=F)
if (i == 1){
d = d_i
} else {
d = bind_rows(d, d_i)
}
}
# add year, join sites, filter NAs, write to csv
d %>%
as_tibble() %>%
mutate(
site_no = as.double(site_no),
year = as.integer(str_sub(lev_dt, 1, 4)),
lev_va = as.double(lev_va)) %>%
left_join(
sites, by=c('site_no','agency_cd','site_tp_cd')) %>%
filter(!is.na(lev_va)) %>% # 170,730 rows of data: region_id, site_no, year, lev_dt, lev_va
write_csv(gw_data_csv) # 24.3 MB
#}
# read in groundwater data and look at first 100 rows
gw_data = read_csv(gw_data_csv)
gw_data %>%
head(100) %>%
datatable()
The rows above are just the first 100
rows from all the station data having 175,054 rows total. Here’s the locations of all the stations as clustered markers:
#if (!file.exists(gw_layer_csv)){
# read in data
d = read_csv(gw_data_csv) %>%
# summarize by site and year
group_by(region_id, city, site_no, year) %>%
summarize(
lev = mean(lev_va)) %>%
# summarize across sites by year
group_by(region_id, city, site_no) %>%
mutate(
year_n = n(),
year_beg = min(year),
year_end = max(year),
gw_score = rescale(1/lev)*100) %>% # rescale inverse of ft below surface, 0 to 100
filter(
year_n >= 5, # ensure at least 5 years of data
year_end == year(now())) %>% # ensure has data for 2017
ungroup() %>%
group_by(region_id, city, year) %>%
summarize(
gw_score = mean(gw_score)) # average score across sites
write_csv(d, gw_layer_csv)
#}
read_csv(gw_layer_csv) %>%
datatable()
gw = read_csv(gw_layer_csv)
gw_w = gw %>%
select(-region_id) %>%
spread(city, gw_score)
gw_w %>%
select(-year) %>%
as.xts(., order.by=ymd(sprintf('%d-01-01', gw_w$year))) %>%
dygraph(main = "Groundwater Score") %>%
dyOptions(
strokeWidth = 2,
colors = brewer.pal(ncol(gw_w)-1, 'Spectral'))
# read regions from geojson and join to latest groundwater scores
regions = geojson_read(regions_json, what='sp')
regions@data = regions@data %>%
left_join(
read_csv(gw_layer_csv) %>%
filter(year==year(now())),
by=c('city','region_id'))
# setup color palette and labels
pal = colorNumeric('Spectral', regions$gw_score, reverse=F)
labels <- sprintf(
"<strong>%s</strong><br/>groundwater score: %g",
regions$city, regions$gw_score) %>%
lapply(HTML)
# show interactive map
leaflet(regions) %>%
# http://leaflet-extras.github.io/leaflet-providers/preview/
addProviderTiles(providers$Stamen.TonerLite) %>%
addPolygons(
fillColor = ~pal(gw_score),
fillOpacity = 0.5,
color = ~pal(gw_score),
opacity = 0.7,
weight = 2,
highlight = highlightOptions(
fillOpacity = 0.7,
weight = 5,
opacity = 0.9,
bringToFront = TRUE),
label = labels,
labelOptions = labelOptions(
style = list("font-weight" = "normal", padding = "3px 8px"),
textsize = "15px",
direction = "auto")) %>%
addLegend(pal = pal, values = ~gw_score, opacity = 0.7, title = 'groundwater score',
position = "topleft")