Current 1 Library Import February 17, 2016

advertisement
Comparing Automated Forecasts of Rainfall Probabilities to the BoM’s Published Forecasts
Current
February 17, 2016
1
Library Import
In [1]: import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from mpl_toolkits.basemap import Basemap
from midget.dataviews.adam import obs
from xarray import DataArray
from midget.dataviews import gfe
import datetime as datetime
from netCDF4 import Dataset
from pylab import rcParams
from matplotlib.patches import Polygon
from matplotlib.collections import PatchCollection
import matplotlib.colors as colors
%matplotlib inline
rcParams[’figure.figsize’] = (16,12)
2
2.1
Data Import
Station Data
Code taken from 20160120 - Precip-type forecasts and observations retrieved.ipynb
In [2]: # List of Groups of Stations in a dictionary Numbers
station_group_names = {’VIC’: ’Victoria’,
’V_Melb’: ’Melbourne (Olympic Park)’,
’V_Melb_Gp’: ’Melbourne Area’,
’V_1’: ’Gippsland (VIC)’,
’V_2’: ’Inland(VIC)’,
’V_3’: ’Central (VIC)’,
’V_4’: ’Southwest (VIC)’,
’V_Coast’: ’Coastal Stations to 5km (VIC)’,
’V_Mtn’: ’Mountainous Stations (VIC)’,
’V_Flat_1’: ’Other Stations, Inland (VIC)’, # Other means not coastal or m
’V_Flat_2’: ’Other Stations, Coastal Districts (VIC)’,
}
In [3]: def filter_stations_by_district(station_data, districts):
’’’
station_data assumed to be a pandas data frame with a column of station_number and another o
districts a list of forecast_district codes of interest
1
output a list of station numbers and pandas data frame with lat/lon station data
’’’
station_list = station_data.loc[station_data["forecast_district"].isin(districts)]
station_list = station_list.station_number
station_list = set([val for val in station_list])
return station_list
In [4]: def filter_stations_by_type(station_data, types):
’’’
station_data assumed to be a pandas data frame with a column of station_number and another o
types a list of categories in Category of interest
output a list of station numbers
’’’
station_list = station_data.loc[station_data["Category"].isin(types)]
station_list = station_list.station_number
station_list = set([val for val in station_list])
return station_list
In [5]: def filter_stations_by_region(station_data, types):
’’’
station_data assumed to be a pandas data frame with a column of station_number and another o
types a list of categories in Category of interest
output a list of station numbers
’’’
station_list = station_data.loc[station_data["REGION"].isin(types)]
station_list = station_list.station_number
station_list = set([val for val in station_list])
return station_list
Other groups were added below for mapping purposes. These simply take all stations given their region
in the StationData.csv file. In the future they can be defined in groups as has been done previously by Deryn
for Victoria.
In [6]: station_data = pd.read_csv(’/work/autogfe/inputs/StationData.csv’)
station_names = station_data.set_index(’station_number’)[’station_name’].to_dict()
station_groups = {’V_Melb’: [86338]}
station_groups[’V_Melb_Gp’] = [86338, 87184, 87031, 87113, 86282, 86038, 86077, 86068, 86266, 86
vic_station_data = pd.read_csv(’/work/autogfe/inputs/VIC_OneMinuteRainfallDataStations.csv’)
vic_station_data.rename(columns={’Forecast District’:’forecast_district’, ’Site’: ’station_numbe
station_groups[’V_1’]
station_groups[’V_2’]
station_groups[’V_3’]
station_groups[’V_4’]
=
=
=
=
filter_stations_by_district(station_data,
filter_stations_by_district(station_data,
filter_stations_by_district(station_data,
filter_stations_by_district(station_data,
[’VIC_PW005’, ’VIC_PW006’])
[’VIC_PW001’, ’VIC_PW002’, ’VI
[’VIC_PW007’]) | set([86338])
[’VIC_PW009’])
station_groups[’VIC’] = station_groups[’V_1’] | station_groups[’V_2’] | station_groups[’V_3’] |
vic = station_groups[’VIC’]
vic_coastal = station_groups[’V_1’] | station_groups[’V_3’] | station_groups[’V_4’]
2
station_groups[’V_Coast’] = (filter_stations_by_type(station_data, [’coast’]) & vic) | set([8633
station_groups[’V_Mtn’] = filter_stations_by_type(station_data, [’mountains1’]) & vic
vic_flat = filter_stations_by_type(station_data, [’mountains2’, ’flat_inland’]) & vic
station_groups[’V_Flat_1’] = vic_flat & station_groups[’V_2’]
station_groups[’V_Flat_2’] = vic_flat & vic_coastal
# other states for mapping purposes
nsw_mask = station_data[’REGION’].isin([’NSW’])
nsw = list(station_data[nsw_mask].station_number.values)
qld_mask = station_data[’REGION’].isin([’QLD’])
qld = list(station_data[qld_mask].station_number.values)
wa_mask = station_data[’REGION’].isin([’WA’])
wa = list(station_data[wa_mask].station_number.values)
nt_mask = station_data[’REGION’].isin([’NT’])
nt = list(station_data[nt_mask].station_number.values)
sa_mask = station_data[’REGION’].isin([’SA’])
sa = list(station_data[sa_mask].station_number.values)
tas_mask = station_data[’REGION’].isin([’TAS/ANT’])
tas = list(station_data[tas_mask].station_number.values)
2.2
Forecast Data
Sets the date range for forecast data retrieval.
In [7]: date_range_for_retrieval = pd.date_range(’20151220’, ’20151223’)
Imports the OCF forecast data
In [8]: %time ocf_dailypop_forecasts_vic = gfe.fcst.by_lead_hour(’DailyPoP’, ’Op OCF’, date_range_for_re
CPU times: user 19 s, sys: 1.23 s, total: 20.2 s
Wall time: 36.6 s
/home/students/midget/midget/dataviews/foundation.py:39: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing
df.dropna(inplace=True, subset=coords)
Imports the Official forecast data
In [9]: %time official_dailypop_forecasts_vic = gfe.fcst.by_lead_hour(’DailyPoP’, ’Op Official’, date_ra
2016-02-17 00:44:13,010:midget.dataviews.gfe.utils:WARNING: The following files are missing:
/work/autogfe/data/small/forecast/gridSamples Op Official 20151210 0000 QLDRO.csv
/work/autogfe/data/small/forecast/gridSamples Op Official 20151217 0000 SARO.csv
/work/autogfe/data/small/forecast/gridSamples Op Official 20151217 0000 WARO.csv
/work/autogfe/data/small/forecast/gridSamples Op Official 20151218 0000 WARO.csv
3
/work/autogfe/data/small/forecast/gridSamples Op Official 20151220 0000
WARNING:midget.dataviews.gfe.utils:The following files are missing:
/work/autogfe/data/small/forecast/gridSamples Op Official 20151210 0000
/work/autogfe/data/small/forecast/gridSamples Op Official 20151217 0000
/work/autogfe/data/small/forecast/gridSamples Op Official 20151217 0000
/work/autogfe/data/small/forecast/gridSamples Op Official 20151218 0000
/work/autogfe/data/small/forecast/gridSamples Op Official 20151220 0000
QLDRO.csv
QLDRO.csv
SARO.csv
WARO.csv
WARO.csv
QLDRO.csv
CPU times: user 28.6 s, sys: 1.71 s, total: 30.3 s
Wall time: 51.5 s
Imports Daily Observational data
In [10]: %time daily_prcp_obs_vic = obs.daily_15z_precip(date_range_for_retrieval, tolerance_mins=30)
CPU times: user 1.49 s, sys: 81.1 ms, total: 1.57 s
Wall time: 1.79 s
Imports Three Hourly Observational data
In [11]: %time threehour_prcp_obs_vic = obs.three_hourly_15z_precip(date_range_for_retrieval, tolerance_
CPU times: user 1.63 s, sys: 40.7 ms, total: 1.67 s
Wall time: 1.67 s
3
Mapping
3.1
Mapping Function
The mapping function below will plot data on a map for given station locations.
data array:
• Any array of data (in x-array form), assuming it is two dimensional (two coordinates), one of which is
station number. The other coordinate is used to create multiple plots, one plot for each value of the
coordinate.
stations of interest:
• A list/set of station numbers of interest to be plot.
state view options:
•
•
•
•
•
•
•
‘vic’
‘nsw’
‘qld’
‘wa’
‘sa’
‘nt’
‘tas’
Note that only one state can be selected at this current point in time, so only stations within this state
should be listed in stations of interest
4
plot type options:
•
•
•
•
‘precip’ - Colorbar is set by intervals 0, 0.2, 1, 2, 5, 10, 50, 100, 200, and colorbar is labelled.
‘percent’ - Colorbar is set by zmin=0 and zmax=100, and is continuous.
‘rps’ - Colorbar is set by zmin=0 and zmax as being maximum of data array.
‘brier’ - Colorbar is set by zmin=0 and zmax=1
zmin and zmax are required if plot type is not given.
title str: Title for each plot is set out as - “title str - (other coordinate name: other coordinate value)”
where other coordinate name is the 2nd coordinate in the data array that is not station number, and
other coordinate value is the value of that coordinate chosen (ie lead hour of 15) for that plot. For example,
if title str = ‘OCF Forecast % Chance 0.2mm’ and the other coordinate was lead hour, the title would be
this for the first two plots, given the first two values of lead hour are 15 and 39. - “OCF Forecast % Chance
0.2mm - (lead hour: 15)” - “OCF Forecast % Chance 0.2mm - (lead hour: 39)”
In [12]: def mapping_func(data_array,stations_of_interest,state_view,title_str,zmin=None,zmax=None,plot_
’’’
The inputs for the function are as follows:
- data_array: 2D xarray with station_number as one coordinate of the two coordinates
- stations_of_interest: must be a LIST of station numbers
- title_str: title to be added before stating other coordinate (ie lead_hour, valid_start s
- zmin: minimum value of colour bar
- zmax: maximum value of colour bar
- plot_type: pre-defined plot types as string.
’’’
# State dictionary
state_dict = {’vic’: [140.5,-39.5,150.5,-33.5,[’1’,’4’],[’2’],’/data/work/autogfe/database/
’nsw’: [140.5,-37.8,154.5,-28.0,[’2’,’3’,’4’],[’1’],’/data/work/autogfe/dat
’qld’: [137.4,-29.3,154.0,-9.8,[’1’,’4’,’7’],[’3’],’/data/work/autogfe/data
’wa’: [112.5,-35.3,129.4,-13.3,[’4’,’7’],[’5’],’/data/work/autogfe/database
’sa’: [128.4,-38.2,141.5,-25.5,[’1’,’2’,’3’,’5’,’7’],[’4’],’/data/work/auto
’nt’: [128.4,-26.3,138.4,-10.3,[’3’,’4’,’5’],[’7’],’/data/work/autogfe/data
’tas’: [143.5,-43.8,148.7,-39.5,[’2’],[’6’],’/data/work/autogfe/database/sp
}
# Ensures data_array is a 2D xarray
if len(data_array.coords) > 2:
return ’Error: Data array has more than two coordinates’
# Ensures either zmin and zmax or plot_type is given
if plot_type == None and zmin == None and zmax == None:
return ’Error: zmin/zmax/plot_type not given’
elif plot_type == None and zmax == None:
return ’Error: missing zmin value’
elif plot_type == None and zmin == None:
return ’Error: missing zmax value’
5
# Obtain station spatial coordinates for stations in data_array
mask = station_data[’station_number’].isin(stations_of_interest) # grabs the stations of in
mask2 = station_data[’station_number’].isin(data_array.station_number.values) # finds the i
temp_station_data = station_data[mask] # returns relevant station data to the forecast data
required_station_data = temp_station_data[mask2]
# Select valid stations in data_array
data_array_removed_not_rounded = data_array.sel(station_number=required_station_data.statio
data_array_removed = data_array_removed_not_rounded.round(decimals=2) # rounds to two decim
# Find the required number of plots and which dimension number they relate to
if str(data_array_removed.dims[0]) == ’station_number’:
other_coord = data_array_removed.dims[1]
rel_position = 1
else:
other_coord = data_array_removed.dims[0]
rel_position = 0
num_plots = len(data_array_removed.coords[other_coord])
# Set Basemap environment, grab state topography
m = Basemap(projection=’merc’, resolution = ’h’, llcrnrlon=state_dict[state_view][0], llcrn
outside_states = state_dict[state_view][4]
current_state = state_dict[state_view][5]
gfe_topo = Dataset(state_dict[state_view][6])
m.readshapefile(’/data/work/autogfe/database/spatial/ASGC_data/STE11aAust’, ’StateShapes’,
# Import topography variables
elev_lat_grid = gfe_topo.variables[’latitude’][:]
elev_lon_grid = gfe_topo.variables[’longitude’][:]
elevation_grid = gfe_topo.variables[’Topo_SFC’][0,:,:]
# Set up figure loop
for plotnum in range(0, num_plots):
# Set up sub-plot environment
fig = plt.figure(plotnum+1)
ax = fig.add_subplot(1,1,1)
coord_values = data_array_removed.coords[other_coord].values
ax.set_title(title_str + ’ - (’ + other_coord + ’: ’ + str(coord_values[plotnum]) + ’
# Patches need to be recollected every loop due to being unable to be used on multiple
# Create a patch collection for shading outside states
patches_outside
= []
6
for info, shape in zip(m.StateShapes_info, m.StateShapes):
if info[’STATE_CODE’] in outside_states: # change these numbers to change which sta
patches_outside.append( Polygon(np.array(shape), True) )
outside_shading = PatchCollection(patches_outside, facecolor=’white’, edgecolor=’black’
# Create a patch collection for showing state borders (this is required due to differen
patches_borders
= []
for info, shape in zip(m.StateShapes_info, m.StateShapes):
if info[’STATE_CODE’] in current_state: # this state number should be the state you
patches_borders.append( Polygon(np.array(shape), True) )
current_state_borders = PatchCollection(patches_borders, facecolor=’none’, edgecolor=’b
# Topography plotting
cmap = plt.get_cmap(’OrRd’)
cmap.set_under(color=’#e5f7ff’)
elev_x,elev_y=m(elev_lon_grid,elev_lat_grid)
m.contour(elev_x,elev_y,elevation_grid,np.arange(200.,2400.,200.),colors=’k’,linewidths
m.contourf(elev_x,elev_y,elevation_grid,np.arange(-50.,2400.,50.),cmap=cmap,alpha=1,vmi
m.contour(elev_x,elev_y,elevation_grid,np.arange(500.,501.,2.),colors=’b’,linewidths=1,
# Set plot boundaries and shading
m.drawmapboundary()
ax.add_collection(outside_shading)
ax.add_collection(current_state_borders)
# Convert lat/lon points to a linear grid
lats = required_station_data[’LATITUDE’].values
lons = required_station_data[’LONGITUDE’].values
x,y = m(lons, lats)
# Selecting data to view
if rel_position == 1:
data_plot = data_array_removed[:,plotnum]
else:
data_plot = data_array_removed[plotnum,:]
# Plot data
if plot_type == ’precip’:
bounds = np.array([0,0.2, 1, 2, 5, 10, 50, 100, 150, 200])
norm = colors.BoundaryNorm(boundaries=bounds, ncolors=256)#LogNorm(vmin=zmin, vmax=
rain_cmap = plt.cm.get_cmap("terrain_r")
rain_cmap.set_over("purple")
pt=m.scatter(x,y,c=data_plot,marker=’s’,s=350, cmap = rain_cmap,zorder=2,alpha=1, n
cbar = m.colorbar(pt,extend=’max’)
cbar.ax.set_ylabel(’Precipitation (mm)’)
elif plot_type == ’rps’:
pt=m.scatter(x,y,c=data_plot,marker=’s’,s=350, cmap = plt.matplotlib.cm.summer_r,zo
7
m.colorbar(pt)
elif plot_type == ’percent’:
pt=m.scatter(x,y,c=data_plot,marker=’s’,s=350, cmap = plt.matplotlib.cm.gist_earth_
m.colorbar(pt)
elif plot_type == ’brier’:
pt=m.scatter(x,y,c=data_plot,marker=’s’,s=350, cmap = plt.matplotlib.cm.gist_earth_
m.colorbar(pt)
else:
pt=m.scatter(x,y,c=data_plot,marker=’s’,s=350, cmap = plt.matplotlib.cm.gist_earth_
m.colorbar(pt)
labels = data_plot.values
for label, xpt, ypt in zip(labels, x, y):
plt.text(xpt, ypt, label, horizontalalignment=’center’, verticalalignment=’center’,
plt.show()
return plt
3.2
Mapping function test area
First line converts the 3D array to being 2D by resetting the coordinate. Second line is the mapping function
being used.
In [14]: test=ocf_dailypop_forecasts_vic[:,0,:].reset_coords(names=’valid_start’,drop=True)
mapping_func(daily_prcp_obs_vic,vic,’vic’,’Observed rainfall’,plot_type=’precip’)
8
9
Out[14]: <module ’matplotlib.pyplot’ from ’/home/students/miniconda2/lib/python2.7/site-packages/matplot
In [ ]:
10
Download