Elena Reinisch, 2018-09-11
Last modified: 2018-09-14
# start with same libraries as vector tutorial
%matplotlib inline
from __future__ import (absolute_import, division, print_function)
import os
import matplotlib as mpl
import matplotlib.pyplot as plt
import folium
from folium.plugins import TimeSliderChoropleth
import numpy as np
from shapely.geometry import Point
from shapely.geometry import Polygon
import pandas as pd
import geopandas as gpd
from geopandas import GeoSeries, GeoDataFrame
%run -i load_data.py
co2_data_table.head()
We get a first look at how many unique sites we have.
len(co2_geo_data_table.Latitude.unique())
Now we determine which flux column to take based on the number of measurements.
len(co2_geo_data_table[co2_geo_data_table['CO2 Flux'].isna()])
len(co2_geo_data_table[co2_geo_data_table['CO2 Flux.1'].isna()])
Because there are more records of CO2 Flux.1, we use these values for CO2 flux.
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
world.head(2)
f, ax = plt.subplots(1, figsize=(12, 6))
ax.set_title('co2 site locations')
world.plot(ax=ax, edgecolor='gray')
co2_geo_data_table.plot(ax=ax, marker='.', color='r')
ax.set_ylim([-90, 90])
ax.set_axis_off()
plt.axis('equal');
plt.figure()
NA_co2_geo_data_table = co2_geo_data_table[co2_geo_data_table.Latitude.between(23, 90) & co2_geo_data_table.Longitude.between(-131, -68)]
NA_co2_geo_data_table.describe()
firstmap = folium.Map(location=[39.8283, -98.5795], tiles='Stamen Terrain', zoom_start=4)
for index, row in NA_co2_geo_data_table.iterrows():
iconcolor='blue'
if pd.isnull(row['CO2']): # no CO2 values
if pd.isnull(row['CO2 Flux.1']): # no CO2 values and no flux values
iconcolor = 'red'
marker = folium.Marker((row['Latitude'],row['Longitude']), popup="{}: no CO2 values".format(row['Site Type']),
icon=folium.Icon(color=iconcolor, icon='warning-sign'))
marker.add_to(firstmap)
else:
iconcolor = 'blue'
marker = folium.Marker((row['Latitude'],row['Longitude']), popup="{}: CO2 flux =\n {} mg C m-2 d-1".format(row['Site Type'], row['CO2 Flux.1']),
icon=folium.Icon(color=iconcolor, icon='info-sign'))
marker.add_to(firstmap)
else: # CO2 values
if pd.isnull(row['CO2 Flux.1']): # CO2 values but no flux values
iconcolor = 'purple'
marker = folium.Marker((row['Latitude'],row['Longitude']), popup="{}: CO2 =\n {} micro-atm".format(row['Site Type'], row['CO2']),
icon=folium.Icon(color=iconcolor, icon='info-sign'))
marker.add_to(firstmap)
else:
iconcolor = 'green' # both CO2 values and flux values
marker = folium.Marker((row['Latitude'],row['Longitude']), popup="{}: \n CO2 = {} micro-atm \n CO2 flux = {} mg C m-2 d-1".format(row['Site Type'], row['CO2'], row['CO2 Flux.1']),
icon=folium.Icon(color=iconcolor, icon='info-sign'))
marker.add_to(firstmap)
firstmap
Our goal is to create an interactive time series map showing locations of records over time. To do so, we will use TimeSliderChoropleth
available from folium.plugins
. In order to do so, we will first need to clean our time data. Some of the data is available in 'YYYYMMDD' format, while others are given in year only, and still others are in a format that is unrecognizable. We will work with the 'YYYYMMDD' and 'YYYY' data, but first we need to get this data into a single format. We choose 'YYYYMMDD'.
First, we look at all of the available data with date values.
co2_geo_data_table = co2_geo_data_table.copy()
co2_geo_data_table = co2_geo_data_table[~co2_geo_data_table['CO2'].isna()]
The dates are in different formats. While some dates are not identifiable, we can determine that some dates are year-only. To be able to incoporate these into our data set, we will adjust these dates to read as YYYYMMDD. We first create a new column called 'ModifiedDateTime' which contains the values of 'DateTime' converted to float64.
co2_geo_data_table['ModifiedDateTime'] = pd.Series(pd.to_numeric(co2_geo_data_table['DateTime'], errors='coerce'), dtype='int64')
co2_geo_data_table.ModifiedDateTime.fillna(0);
Now we take all the values that were recorded by year-only and convert them to 'YYYYMMDD' by multiplying by 1e4.
co2_geo_data_table.loc[co2_geo_data_table['ModifiedDateTime'] < 3000, 'ModifiedDateTime'] = co2_geo_data_table[co2_geo_data_table['ModifiedDateTime'] < 3000].ModifiedDateTime*1e4+101
We confirm that the values were changed correctly.
co2_geo_data_table[co2_geo_data_table['ModifiedDateTime'] < 3000].ModifiedDateTime
This means that there are no longer any values below 3000 (e.g., 2012). We confirm the minimum of the data set to be from the unrecognized date format.
co2_geo_data_table[co2_geo_data_table['ModifiedDateTime'] < 50000].ModifiedDateTime.min()
Lastly, we check that 2012 was correctly incorporated.
co2_geo_data_table[co2_geo_data_table['ModifiedDateTime'] > 20110000].ModifiedDateTime.min()
We can now isolate a subset of usable data to perform plot as an interactive time series using TimeSliderChoropleth
. For a great example of how to use TimeSliderChoropleth
, please see this example. For this demonstration, we use data between 2004 and 2006.
co2_geo_data_table_TS_subset = co2_geo_data_table[(co2_geo_data_table['ModifiedDateTime'] > 20010000) & (co2_geo_data_table['ModifiedDateTime'] < 20060000)]
co2_geo_data_table_TS_subset = co2_geo_data_table_TS_subset.copy()
We convert the dates in 'ModifiedDateTime' to DateTime values in order to operate on years.
co2_geo_data_table_TS_subset['ModifiedDateTime'] = pd.to_datetime(co2_geo_data_table_TS_subset['ModifiedDateTime'].astype('int64').astype('str'), yearfirst=True)
co2_geo_data_table_TS_subset.ModifiedDateTime.head()
We now build a dictionary which references our date values using DateTime indices.
datetime_index = pd.DatetimeIndex(co2_geo_data_table_TS_subset['ModifiedDateTime'])
dt_index_epochs = datetime_index.astype(int) // 10**9
dt_index = dt_index_epochs.astype('U10')
dt_index
datetime_index
Next, we build the base for a style dictionary that will change the color of our plotted records based on the level of CO2 concentration at a particular time. If there are no measurements for a pariticular date, then the opacity is set to zero.
Something that I found was that TimeSliderChoropleth
seems to rely on having regions rather than points to display the changes over time. I thus represent our data points as polygons rather than points (as recorded) in this plot.
styledata = {}
n_periods = len(dt_index)
n_sample = len(dt_index)
record_index = 0
for record in co2_geo_data_table_TS_subset.index:
co2_geo_data_table_TS_subset.geometry.at[record] = Polygon(((co2_geo_data_table_TS_subset.loc[record].Longitude, co2_data_table.loc[record].Latitude),
(co2_geo_data_table_TS_subset.loc[record].Longitude, co2_data_table.loc[record].Latitude+1),
(co2_geo_data_table_TS_subset.loc[record].Longitude+1, co2_data_table.loc[record].Latitude+1),
(co2_geo_data_table_TS_subset.loc[record].Longitude+1, co2_data_table.loc[record].Latitude),
(co2_geo_data_table_TS_subset.loc[record].Longitude, co2_data_table.loc[record].Latitude)))
df = pd.DataFrame(
{'color': co2_geo_data_table_TS_subset.CO2.get_values(),
'opacity': 0*np.random.normal(size=n_periods)},
index=dt_index)
df = df.cumsum()
df.sample(n_sample, replace=False).sort_index()
df.loc[df.index == dt_index[record_index], 'opacity'] = np.ones(len(df.index[df.index == dt_index[record_index]]))#co2_geo_data_table_TS_subset[df.index == dt_index[record]].CO2.get_values()
styledata[record_index] = df
record_index += 1
styledata.get(0).head()
We now map the colors to a predefined color map. Since the opacity values are set to be 0 or 1, they do not need to be scaled.
from branca.colormap import linear
max_color, min_color = 0, 0
for ModifiedDateTime, data in styledata.items():
max_color = max(max_color, data['color'].max())
min_color = max(min_color, data['color'].min())
cmap = linear.BuPu_06.scale(min_color, max_color)
for country, data in styledata.items():
data['color'] = data['color'].apply(cmap)
data['opacity'] = (data['opacity'])
data['opacity'] = (data['opacity'])
styledata.get(0).head()
Now we can place the style data in dictionary format.
styledict = {
str(record): data.to_dict(orient='index') for
record, data in styledata.items()
}
Our last step before plotting is to set the 'ModifiedDateTime' values to string so that we can convert to JSON format.
co2_geo_data_table_TS_subset['ModifiedDateTime'] = co2_geo_data_table_TS_subset['ModifiedDateTime'].astype('str')
We use TimeSliderChoropleth
to map the locations of measured CO2 concentration over time for our data subset. Colors correspond to levels of CO2 concentration.
It turns out that TimeSliderChoropleth
does not have a straightforward way to get a legend for your plot. To get around this, I added a choropleth layer with point color and opacity set to zero to build the legend.
m = folium.Map([-10, -55], zoom_start=5)
g = TimeSliderChoropleth(
co2_geo_data_table_TS_subset.to_json(),
styledict=styledict, overlay = True
).add_to(m)
m.choropleth(
geo_data=co2_geo_data_table_TS_subset.to_json(),
data=co2_geo_data_table_TS_subset,
columns=['ModifiedDateTime', 'CO2'],
key_on='feature.id',
fill_color= 'BuPu',
fill_opacity=0.0,
line_opacity=0.0,
legend_name='CO2 (micro-atm)'
)
folium.LayerControl().add_to(m)
m.save('TimeSeries_CO2_2004to2006.html')
m