Sep 29, 2021
All lecture slides will work on Binder, data is available to be loaded, etc. Binder has an exact copy of the file structure in the GitHub repo.
An interface just like the pandas
plot() function, but much more useful.
# Our usual imports
import pandas as pd
import geopandas as gpd
import numpy as np
import altair as alt
from matplotlib import pyplot as plt
# This will add the .hvplot() function to your DataFrame!
import hvplot.pandas
# Import holoviews too
import holoviews as hv
Example: Interactive choropleth of median property assessments in Philadelphia
# Load the data
url = "https://raw.githubusercontent.com/MUSA-550-Fall-2021/week-3/main/data/opa_residential.csv"
data = pd.read_csv(url)
# Create the Point() objects
data['geometry'] = gpd.points_from_xy(data['lng'], data['lat'])
# Create the GeoDataFrame
data = gpd.GeoDataFrame(data, geometry='geometry', crs="EPSG:4326")
# load the Zillow data from GitHub
url = "https://raw.githubusercontent.com/MUSA-550-Fall-2021/week-3/main/data/zillow_neighborhoods.geojson"
zillow = gpd.read_file(url)
# Important: Make sure the CRS match
data = data.to_crs(zillow.crs)
# perform the spatial join
data = gpd.sjoin(data, zillow, op='within', how='left')
# Calculate the median market value per Zillow neighborhood
median_values = data.groupby("ZillowName", as_index=False)["market_value"].median()
# Merge median values with the Zillow geometries
median_values = zillow.merge(median_values, on="ZillowName")
print(type(median_values))
median_values.head()
Let's add a tile source underneath the choropleth map
import geoviews as gv
import geoviews.tile_sources as gvts
%%opts WMTS [width=800, height=800, xaxis=None, yaxis=None]
choro = median_values.hvplot(c='market_value',
width=500,
height=400,
alpha=0.5,
geo=True,
cmap='viridis',
hover_cols=['ZillowName'])
gvts.ESRI * choro
The spatial distance that a single pixel covers on the ground.
The bounding box that covers the entire extent of the raster data.
Pixel space --> real space
Each band measures the light reflected from different parts of the electromagnetic spectrum.
.tif
image format with additional spatial information embedded in the file, which includes metadata for: NoDataValue
)rasterio
We'll use rasterio
for the majority of our raster analysis
Analysis is built around the "open()" command which returns a "dataset" or "file handle"
import rasterio as rio
# Open the file and get a file "handle"
landsat = rio.open("./data/landsat8_philly.tif")
landsat
# The CRS
landsat.crs
# The bounds
landsat.bounds
# The number of bands available
landsat.count
# The band numbers that are available
landsat.indexes
# Number of pixels in the x and y directions
landsat.shape
# The 6 parameters that map from pixel to real space
landsat.transform
# All of the meta data
landsat.meta
Tell the file which band to read, starting with 1
# This is just a numpy array
data = landsat.read(1)
data
We can plot it with matplotlib's imshow
fig, ax = plt.subplots(figsize=(10,10))
img = ax.imshow(data)
plt.colorbar(img)
Note that imshow needs a bounding box of the form: (left, right, bottom, top).
See the documentation for imshow
if you forget the right ordering...(I often do!)
Matplotlib has a built in log normalization...
import matplotlib.colors as mcolors
# Initialize
fig, ax = plt.subplots(figsize=(10, 10))
# Plot the image
img = ax.imshow(
data,
norm=mcolors.LogNorm(), # Use a log colorbar scale
extent=[ # Set the extent of the images
landsat.bounds.left,
landsat.bounds.right,
landsat.bounds.bottom,
landsat.bounds.top,
],
)
# Add a colorbar
plt.colorbar(img)
Two requirements:
city_limits = gpd.read_file("./data/City_Limits.geojson")
# Convert to the correct CRS!
print(landsat.crs.data)
city_limits = city_limits.to_crs(landsat.crs.data['init'])
# Initialize
fig, ax = plt.subplots(figsize=(10, 10))
# The extent of the data
landsat_extent = [
landsat.bounds.left,
landsat.bounds.right,
landsat.bounds.bottom,
landsat.bounds.top,
]
# Plot!
img = ax.imshow(data,
norm=mcolors.LogNorm(), #NEW
extent=landsat_extent) #NEW
# Add the city limits
city_limits.plot(ax=ax, facecolor="none", edgecolor="white")
# Add a colorbar and turn off axis lines
plt.colorbar(img)
ax.set_axis_off()
Landsat 8 has 11 bands spanning the electromagnetic spectrum from visible to infrared.
First let's read the red, blue, and green bands (bands 4, 3, 2):
The .indexes
attributes tells us the bands available to be read.
Important: they are not zero-indexed!
landsat.indexes
rgb_data = landsat.read([4, 3, 2])
rgb_data.shape
Note the data has the shape: (bands, height, width)
data[2]
fig, axs = plt.subplots(nrows=1, ncols=3, figsize=(12, 6))
cmaps = ["Reds", "Greens", "Blues"]
for i in [0, 1, 2]:
# This subplot axes
ax = axs[i]
# Plot
ax.imshow(rgb_data[i], norm=mcolors.LogNorm(), extent=landsat_extent, cmap=cmaps[i])
city_limits.plot(ax=ax, facecolor="none", edgecolor="black")
# Format
ax.set_axis_off()
ax.set_title(cmaps[i][:-1])
You can specify the subplot layout via the nrows
and ncols
keywords to plt.subplots
. If you have more than one row or column, the function returns:
fig, axs = plt.subplots(nrows=1, ncols=3)
When nrows or ncols > 1, I usually name the returned axes variable "axs" vs. the usual case of just "ax". It's useful for remembering we got more than one Axes object back!
# This has length 3 for each of the 3 columns!
axs
# Left axes
axs[0]
# Middle axes
axs[1]
# Right axes
axs[2]
earthpy
to plot the combined color image¶A useful utility package to perform the proper re-scaling of pixel values
import earthpy.plot as ep
# Initialize
fig, ax = plt.subplots(figsize=(10,10))
# Plot the RGB bands
ep.plot_rgb(rgb_data, rgb=(0, 1, 2), ax=ax);
# Initialize
fig, ax = plt.subplots(figsize=(10,10))
# Plot the RGB bands
ep.plot_rgb(rgb_data, rgb=(0, 1, 2), ax=ax, stretch=True);
Can we do better?
import xarray as xr
xr.open_rasterio?
ds = xr.open_rasterio("./data/landsat8_philly.tif")
ds
type(ds)
Still experimental, but very promising — interactive plots with widgets for each band!
import hvplot.xarray
img = ds.hvplot.image(width=700, height=500, cmap='viridis')
img
Use rasterio's builtin mask function:
from rasterio.mask import mask
masked, mask_transform = mask(
dataset=landsat,
shapes=city_limits.geometry,
crop=True, # remove pixels not within boundary
all_touched=True, # get all pixels that touch the boudnary
filled=False, # do not fill cropped pixels with a default value
)
masked
masked.shape
# Initialize
fig, ax = plt.subplots(figsize=(10, 10))
# Plot the first band
ax.imshow(masked[0], cmap="viridis", extent=landsat_extent)
# Format and add the city limits
city_limits.boundary.plot(ax=ax, color="gray", linewidth=4)
ax.set_axis_off()
Let's save our cropped raster data. To save raster data, we need both the array of the data and the proper meta-data.
out_meta = landsat.meta
out_meta.update(
{"height": masked.shape[1], "width": masked.shape[2], "transform": mask_transform}
)
print(out_meta)
# write small image to local Geotiff file
with rio.open("data/cropped_landsat.tif", "w", **out_meta) as dst:
dst.write(masked)
# Can't access
landsat
Note: we used a context manager above (the "with" statement) to handle the opening and subsequent closing of our new file.
For more info, see this tutorial.
Often called "map algebra"...
Formula:
NDVI = (NIR - Red) / (NIR + Red)
Healthy vegetation reflects NIR and absorbs visible, so the NDVI for green vegatation
NIR is band 5 and red is band 4
masked.shape
# Note that the indexing here is zero-based, e.g., band 1 is index 0
red = masked[3]
nir = masked[4]
def calculate_NDVI(nir, red):
"""
Calculate the NDVI from the NIR and red landsat bands
"""
# Convert to floats
nir = nir.astype(float)
red = red.astype(float)
# Get valid entries
check = np.logical_and( red.mask == False, nir.mask == False )
# Where the check is True, return the NDVI, else return NaN
ndvi = np.where(check, (nir - red ) / ( nir + red ), np.nan )
return ndvi
NDVI = calculate_NDVI(nir, red)
fig, ax = plt.subplots(figsize=(10,10))
# Plot NDVI
img = ax.imshow(NDVI, extent=landsat_extent)
# Format and plot city limits
city_limits.plot(ax=ax, edgecolor='gray', facecolor='none', linewidth=4)
plt.colorbar(img)
ax.set_axis_off()
ax.set_title("NDVI in Philadelphia", fontsize=18);
# Read in the parks dataset
parks = gpd.read_file('./data/parks.geojson')
# Print out the CRS
parks.crs
landsat.crs
landsat.crs.data['init']
# Conver to landsat CRS
parks = parks.to_crs(landsat.crs.data['init'])
#parks = parks.to_crs(epsg=32618)
fig, ax = plt.subplots(figsize=(10,10))
# Plot NDVI
img = ax.imshow(NDVI, extent=landsat_extent)
# NEW: add the parks
parks.plot(ax=ax, edgecolor='crimson', facecolor='none', linewidth=2)
# Format and plot city limits
city_limits.plot(ax=ax, edgecolor='gray', facecolor='none', linewidth=4)
plt.colorbar(img)
ax.set_axis_off()
ax.set_title("NDVI vs. Parks in Philadelphia", fontsize=18);
It looks like it worked pretty well!
This is called zonal statistics. We can use the rasterstats
package to do this...
from rasterstats import zonal_stats
stats = zonal_stats(parks, NDVI, affine=landsat.transform, stats=['mean', 'median'])
zonal_stats
function¶Zonal statistics of raster values aggregated to vector geometries.
stats
len(stats)
len(parks)
# Store the median value in the parks data frame
parks['median_NDVI'] = [s['median'] for s in stats]
parks.head()
They are all positive, indicating an abundance of green vegetation...
# Initialize
fig, ax = plt.subplots(figsize=(8,6))
# Plot a quick histogram
ax.hist(parks['median_NDVI'], bins='auto')
ax.axvline(x=0, c='k', lw=2)
# Format
ax.set_xlabel("Median NDVI", fontsize=18)
ax.set_ylabel("Number of Parks", fontsize=18);
# Initialize
fig, ax = plt.subplots(figsize=(10,10))
# Plot the city limits
city_limits.plot(ax=ax, edgecolor='black', facecolor='none', linewidth=4)
# Plot the median NDVI
parks.plot(column='median_NDVI', legend=True, ax=ax, cmap='viridis')
# Format
ax.set_axis_off()
Make sure to check the crs!
parks.crs
The CRS is "EPSG:32618" and since it's not in 4326, we'll need to specify the "crs=" keyword when plotting!
# trim to only the columns we want to plot
cols = ["median_NDVI", "SITE_NAME", "geometry"]
# Plot the parks colored by median NDVI
p = parks[cols].hvplot.polygons(c="median_NDVI",
geo=True,
crs=32618,
cmap='viridis',
hover_cols=["SITE_NAME"])
# Plot the city limit boundary
cl = city_limits.hvplot.polygons(
geo=True,
crs=32618,
alpha=0,
line_alpha=1,
line_color="black",
hover=False,
width=700,
height=600,
)
# combine!
cl * p
You're welcome to use matplotlib/geopandas, but encouraged to explore the new hvplot options!
landsat.crs.data['init']
# Load the neighborhoods
hoods = gpd.read_file("./data/zillow_neighborhoods.geojson")
# Convert to the landsat
hoods = hoods.to_crs(landsat.crs.data['init'])
# Calculate the zonal statistics
stats_by_hood = zonal_stats(hoods, NDVI, affine=landsat.transform, stats=['mean', 'median'])
stats_by_hood
# Store the median value in the neighborhood data frame
hoods['median_NDVI'] = [s['median'] for s in stats_by_hood]
hoods.head()
# Initialize
fig, ax = plt.subplots(figsize=(8,6))
# Plot a quick histogram
ax.hist(hoods['median_NDVI'], bins='auto')
ax.axvline(x=0, c='k', lw=2)
# Format
ax.set_xlabel("Median NDVI", fontsize=18)
ax.set_ylabel("Number of Neighborhoods", fontsize=18);
# Initialize
fig, ax = plt.subplots(figsize=(10,10))
# Plot the city limits
city_limits.plot(ax=ax, edgecolor='black', facecolor='none', linewidth=4)
# Plot the median NDVI
hoods.plot(column='median_NDVI', legend=True, ax=ax)
# Format
ax.set_axis_off()
With hvplot:
# trim to only the columns we want to plot
cols = ["median_NDVI", "ZillowName", "geometry"]
# Plot the parks colored by median NDVI
hoods[cols].hvplot.polygons(c="median_NDVI",
geo=True,
crs=32618,
width=700,
height=600,
cmap='viridis',
hover_cols=["ZillowName"])
With altair:
# trim to only the columns we want to plot
cols = ["median_NDVI", "ZillowName", "geometry"]
(
alt.Chart(hoods[cols].to_crs(epsg=4326))
.mark_geoshape()
.encode(
color=alt.Color(
"median_NDVI",
scale=alt.Scale(scheme="viridis"),
legend=alt.Legend(title="Median NDVI"),
),
tooltip=["ZillowName", "median_NDVI"],
)
)
# calculate the top 20
cols = ['ZillowName', 'median_NDVI']
top20 = hoods[cols].sort_values('median_NDVI', ascending=False).head(n=20)
top20.hvplot.bar(x='ZillowName', y='median_NDVI', rot=90)
With altair:
(
alt.Chart(top20)
.mark_bar()
.encode(x=alt.X("ZillowName", sort="-y"), # Sort in descending order by y
y="median_NDVI",
tooltip=['ZillowName', 'median_NDVI'])
.properties(width=800)
)
cols = ['ZillowName', 'median_NDVI']
bottom20 = hoods[cols].sort_values('median_NDVI', ascending=True).tail(n=20)
bottom20.hvplot.bar(x='ZillowName', y='median_NDVI', rot=90)
With altair:
(
alt.Chart(bottom20)
.mark_bar()
.encode(x=alt.X("ZillowName", sort="y"), # Sort in ascending order by y
y="median_NDVI",
tooltip=['ZillowName', 'median_NDVI'])
.properties(width=800)
)