Comparing Automated Forecasts of Rainfall Probabilities to the BoM’s Published Forecasts Current February 17, 2016 1 Library Import In [1]: import pandas as pd import matplotlib.pyplot as plt import numpy as np from mpl_toolkits.basemap import Basemap from midget.dataviews.adam import obs from xarray import DataArray from midget.dataviews import gfe import datetime as datetime from netCDF4 import Dataset from pylab import rcParams from matplotlib.patches import Polygon from matplotlib.collections import PatchCollection import matplotlib.colors as colors %matplotlib inline rcParams[’figure.figsize’] = (16,12) 2 2.1 Data Import Station Data Code taken from 20160120 - Precip-type forecasts and observations retrieved.ipynb In [2]: # List of Groups of Stations in a dictionary Numbers station_group_names = {’VIC’: ’Victoria’, ’V_Melb’: ’Melbourne (Olympic Park)’, ’V_Melb_Gp’: ’Melbourne Area’, ’V_1’: ’Gippsland (VIC)’, ’V_2’: ’Inland(VIC)’, ’V_3’: ’Central (VIC)’, ’V_4’: ’Southwest (VIC)’, ’V_Coast’: ’Coastal Stations to 5km (VIC)’, ’V_Mtn’: ’Mountainous Stations (VIC)’, ’V_Flat_1’: ’Other Stations, Inland (VIC)’, # Other means not coastal or m ’V_Flat_2’: ’Other Stations, Coastal Districts (VIC)’, } In [3]: def filter_stations_by_district(station_data, districts): ’’’ station_data assumed to be a pandas data frame with a column of station_number and another o districts a list of forecast_district codes of interest 1 output a list of station numbers and pandas data frame with lat/lon station data ’’’ station_list = station_data.loc[station_data["forecast_district"].isin(districts)] station_list = station_list.station_number station_list = set([val for val in station_list]) return station_list In [4]: def filter_stations_by_type(station_data, types): ’’’ station_data assumed to be a pandas data frame with a column of station_number and another o types a list of categories in Category of interest output a list of station numbers ’’’ station_list = station_data.loc[station_data["Category"].isin(types)] station_list = station_list.station_number station_list = set([val for val in station_list]) return station_list In [5]: def filter_stations_by_region(station_data, types): ’’’ station_data assumed to be a pandas data frame with a column of station_number and another o types a list of categories in Category of interest output a list of station numbers ’’’ station_list = station_data.loc[station_data["REGION"].isin(types)] station_list = station_list.station_number station_list = set([val for val in station_list]) return station_list Other groups were added below for mapping purposes. These simply take all stations given their region in the StationData.csv file. In the future they can be defined in groups as has been done previously by Deryn for Victoria. In [6]: station_data = pd.read_csv(’/work/autogfe/inputs/StationData.csv’) station_names = station_data.set_index(’station_number’)[’station_name’].to_dict() station_groups = {’V_Melb’: [86338]} station_groups[’V_Melb_Gp’] = [86338, 87184, 87031, 87113, 86282, 86038, 86077, 86068, 86266, 86 vic_station_data = pd.read_csv(’/work/autogfe/inputs/VIC_OneMinuteRainfallDataStations.csv’) vic_station_data.rename(columns={’Forecast District’:’forecast_district’, ’Site’: ’station_numbe station_groups[’V_1’] station_groups[’V_2’] station_groups[’V_3’] station_groups[’V_4’] = = = = filter_stations_by_district(station_data, filter_stations_by_district(station_data, filter_stations_by_district(station_data, filter_stations_by_district(station_data, [’VIC_PW005’, ’VIC_PW006’]) [’VIC_PW001’, ’VIC_PW002’, ’VI [’VIC_PW007’]) | set([86338]) [’VIC_PW009’]) station_groups[’VIC’] = station_groups[’V_1’] | station_groups[’V_2’] | station_groups[’V_3’] | vic = station_groups[’VIC’] vic_coastal = station_groups[’V_1’] | station_groups[’V_3’] | station_groups[’V_4’] 2 station_groups[’V_Coast’] = (filter_stations_by_type(station_data, [’coast’]) & vic) | set([8633 station_groups[’V_Mtn’] = filter_stations_by_type(station_data, [’mountains1’]) & vic vic_flat = filter_stations_by_type(station_data, [’mountains2’, ’flat_inland’]) & vic station_groups[’V_Flat_1’] = vic_flat & station_groups[’V_2’] station_groups[’V_Flat_2’] = vic_flat & vic_coastal # other states for mapping purposes nsw_mask = station_data[’REGION’].isin([’NSW’]) nsw = list(station_data[nsw_mask].station_number.values) qld_mask = station_data[’REGION’].isin([’QLD’]) qld = list(station_data[qld_mask].station_number.values) wa_mask = station_data[’REGION’].isin([’WA’]) wa = list(station_data[wa_mask].station_number.values) nt_mask = station_data[’REGION’].isin([’NT’]) nt = list(station_data[nt_mask].station_number.values) sa_mask = station_data[’REGION’].isin([’SA’]) sa = list(station_data[sa_mask].station_number.values) tas_mask = station_data[’REGION’].isin([’TAS/ANT’]) tas = list(station_data[tas_mask].station_number.values) 2.2 Forecast Data Sets the date range for forecast data retrieval. In [7]: date_range_for_retrieval = pd.date_range(’20151220’, ’20151223’) Imports the OCF forecast data In [8]: %time ocf_dailypop_forecasts_vic = gfe.fcst.by_lead_hour(’DailyPoP’, ’Op OCF’, date_range_for_re CPU times: user 19 s, sys: 1.23 s, total: 20.2 s Wall time: 36.6 s /home/students/midget/midget/dataviews/foundation.py:39: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing df.dropna(inplace=True, subset=coords) Imports the Official forecast data In [9]: %time official_dailypop_forecasts_vic = gfe.fcst.by_lead_hour(’DailyPoP’, ’Op Official’, date_ra 2016-02-17 00:44:13,010:midget.dataviews.gfe.utils:WARNING: The following files are missing: /work/autogfe/data/small/forecast/gridSamples Op Official 20151210 0000 QLDRO.csv /work/autogfe/data/small/forecast/gridSamples Op Official 20151217 0000 SARO.csv /work/autogfe/data/small/forecast/gridSamples Op Official 20151217 0000 WARO.csv /work/autogfe/data/small/forecast/gridSamples Op Official 20151218 0000 WARO.csv 3 /work/autogfe/data/small/forecast/gridSamples Op Official 20151220 0000 WARNING:midget.dataviews.gfe.utils:The following files are missing: /work/autogfe/data/small/forecast/gridSamples Op Official 20151210 0000 /work/autogfe/data/small/forecast/gridSamples Op Official 20151217 0000 /work/autogfe/data/small/forecast/gridSamples Op Official 20151217 0000 /work/autogfe/data/small/forecast/gridSamples Op Official 20151218 0000 /work/autogfe/data/small/forecast/gridSamples Op Official 20151220 0000 QLDRO.csv QLDRO.csv SARO.csv WARO.csv WARO.csv QLDRO.csv CPU times: user 28.6 s, sys: 1.71 s, total: 30.3 s Wall time: 51.5 s Imports Daily Observational data In [10]: %time daily_prcp_obs_vic = obs.daily_15z_precip(date_range_for_retrieval, tolerance_mins=30) CPU times: user 1.49 s, sys: 81.1 ms, total: 1.57 s Wall time: 1.79 s Imports Three Hourly Observational data In [11]: %time threehour_prcp_obs_vic = obs.three_hourly_15z_precip(date_range_for_retrieval, tolerance_ CPU times: user 1.63 s, sys: 40.7 ms, total: 1.67 s Wall time: 1.67 s 3 Mapping 3.1 Mapping Function The mapping function below will plot data on a map for given station locations. data array: • Any array of data (in x-array form), assuming it is two dimensional (two coordinates), one of which is station number. The other coordinate is used to create multiple plots, one plot for each value of the coordinate. stations of interest: • A list/set of station numbers of interest to be plot. state view options: • • • • • • • ‘vic’ ‘nsw’ ‘qld’ ‘wa’ ‘sa’ ‘nt’ ‘tas’ Note that only one state can be selected at this current point in time, so only stations within this state should be listed in stations of interest 4 plot type options: • • • • ‘precip’ - Colorbar is set by intervals 0, 0.2, 1, 2, 5, 10, 50, 100, 200, and colorbar is labelled. ‘percent’ - Colorbar is set by zmin=0 and zmax=100, and is continuous. ‘rps’ - Colorbar is set by zmin=0 and zmax as being maximum of data array. ‘brier’ - Colorbar is set by zmin=0 and zmax=1 zmin and zmax are required if plot type is not given. title str: Title for each plot is set out as - “title str - (other coordinate name: other coordinate value)” where other coordinate name is the 2nd coordinate in the data array that is not station number, and other coordinate value is the value of that coordinate chosen (ie lead hour of 15) for that plot. For example, if title str = ‘OCF Forecast % Chance 0.2mm’ and the other coordinate was lead hour, the title would be this for the first two plots, given the first two values of lead hour are 15 and 39. - “OCF Forecast % Chance 0.2mm - (lead hour: 15)” - “OCF Forecast % Chance 0.2mm - (lead hour: 39)” In [12]: def mapping_func(data_array,stations_of_interest,state_view,title_str,zmin=None,zmax=None,plot_ ’’’ The inputs for the function are as follows: - data_array: 2D xarray with station_number as one coordinate of the two coordinates - stations_of_interest: must be a LIST of station numbers - title_str: title to be added before stating other coordinate (ie lead_hour, valid_start s - zmin: minimum value of colour bar - zmax: maximum value of colour bar - plot_type: pre-defined plot types as string. ’’’ # State dictionary state_dict = {’vic’: [140.5,-39.5,150.5,-33.5,[’1’,’4’],[’2’],’/data/work/autogfe/database/ ’nsw’: [140.5,-37.8,154.5,-28.0,[’2’,’3’,’4’],[’1’],’/data/work/autogfe/dat ’qld’: [137.4,-29.3,154.0,-9.8,[’1’,’4’,’7’],[’3’],’/data/work/autogfe/data ’wa’: [112.5,-35.3,129.4,-13.3,[’4’,’7’],[’5’],’/data/work/autogfe/database ’sa’: [128.4,-38.2,141.5,-25.5,[’1’,’2’,’3’,’5’,’7’],[’4’],’/data/work/auto ’nt’: [128.4,-26.3,138.4,-10.3,[’3’,’4’,’5’],[’7’],’/data/work/autogfe/data ’tas’: [143.5,-43.8,148.7,-39.5,[’2’],[’6’],’/data/work/autogfe/database/sp } # Ensures data_array is a 2D xarray if len(data_array.coords) > 2: return ’Error: Data array has more than two coordinates’ # Ensures either zmin and zmax or plot_type is given if plot_type == None and zmin == None and zmax == None: return ’Error: zmin/zmax/plot_type not given’ elif plot_type == None and zmax == None: return ’Error: missing zmin value’ elif plot_type == None and zmin == None: return ’Error: missing zmax value’ 5 # Obtain station spatial coordinates for stations in data_array mask = station_data[’station_number’].isin(stations_of_interest) # grabs the stations of in mask2 = station_data[’station_number’].isin(data_array.station_number.values) # finds the i temp_station_data = station_data[mask] # returns relevant station data to the forecast data required_station_data = temp_station_data[mask2] # Select valid stations in data_array data_array_removed_not_rounded = data_array.sel(station_number=required_station_data.statio data_array_removed = data_array_removed_not_rounded.round(decimals=2) # rounds to two decim # Find the required number of plots and which dimension number they relate to if str(data_array_removed.dims[0]) == ’station_number’: other_coord = data_array_removed.dims[1] rel_position = 1 else: other_coord = data_array_removed.dims[0] rel_position = 0 num_plots = len(data_array_removed.coords[other_coord]) # Set Basemap environment, grab state topography m = Basemap(projection=’merc’, resolution = ’h’, llcrnrlon=state_dict[state_view][0], llcrn outside_states = state_dict[state_view][4] current_state = state_dict[state_view][5] gfe_topo = Dataset(state_dict[state_view][6]) m.readshapefile(’/data/work/autogfe/database/spatial/ASGC_data/STE11aAust’, ’StateShapes’, # Import topography variables elev_lat_grid = gfe_topo.variables[’latitude’][:] elev_lon_grid = gfe_topo.variables[’longitude’][:] elevation_grid = gfe_topo.variables[’Topo_SFC’][0,:,:] # Set up figure loop for plotnum in range(0, num_plots): # Set up sub-plot environment fig = plt.figure(plotnum+1) ax = fig.add_subplot(1,1,1) coord_values = data_array_removed.coords[other_coord].values ax.set_title(title_str + ’ - (’ + other_coord + ’: ’ + str(coord_values[plotnum]) + ’ # Patches need to be recollected every loop due to being unable to be used on multiple # Create a patch collection for shading outside states patches_outside = [] 6 for info, shape in zip(m.StateShapes_info, m.StateShapes): if info[’STATE_CODE’] in outside_states: # change these numbers to change which sta patches_outside.append( Polygon(np.array(shape), True) ) outside_shading = PatchCollection(patches_outside, facecolor=’white’, edgecolor=’black’ # Create a patch collection for showing state borders (this is required due to differen patches_borders = [] for info, shape in zip(m.StateShapes_info, m.StateShapes): if info[’STATE_CODE’] in current_state: # this state number should be the state you patches_borders.append( Polygon(np.array(shape), True) ) current_state_borders = PatchCollection(patches_borders, facecolor=’none’, edgecolor=’b # Topography plotting cmap = plt.get_cmap(’OrRd’) cmap.set_under(color=’#e5f7ff’) elev_x,elev_y=m(elev_lon_grid,elev_lat_grid) m.contour(elev_x,elev_y,elevation_grid,np.arange(200.,2400.,200.),colors=’k’,linewidths m.contourf(elev_x,elev_y,elevation_grid,np.arange(-50.,2400.,50.),cmap=cmap,alpha=1,vmi m.contour(elev_x,elev_y,elevation_grid,np.arange(500.,501.,2.),colors=’b’,linewidths=1, # Set plot boundaries and shading m.drawmapboundary() ax.add_collection(outside_shading) ax.add_collection(current_state_borders) # Convert lat/lon points to a linear grid lats = required_station_data[’LATITUDE’].values lons = required_station_data[’LONGITUDE’].values x,y = m(lons, lats) # Selecting data to view if rel_position == 1: data_plot = data_array_removed[:,plotnum] else: data_plot = data_array_removed[plotnum,:] # Plot data if plot_type == ’precip’: bounds = np.array([0,0.2, 1, 2, 5, 10, 50, 100, 150, 200]) norm = colors.BoundaryNorm(boundaries=bounds, ncolors=256)#LogNorm(vmin=zmin, vmax= rain_cmap = plt.cm.get_cmap("terrain_r") rain_cmap.set_over("purple") pt=m.scatter(x,y,c=data_plot,marker=’s’,s=350, cmap = rain_cmap,zorder=2,alpha=1, n cbar = m.colorbar(pt,extend=’max’) cbar.ax.set_ylabel(’Precipitation (mm)’) elif plot_type == ’rps’: pt=m.scatter(x,y,c=data_plot,marker=’s’,s=350, cmap = plt.matplotlib.cm.summer_r,zo 7 m.colorbar(pt) elif plot_type == ’percent’: pt=m.scatter(x,y,c=data_plot,marker=’s’,s=350, cmap = plt.matplotlib.cm.gist_earth_ m.colorbar(pt) elif plot_type == ’brier’: pt=m.scatter(x,y,c=data_plot,marker=’s’,s=350, cmap = plt.matplotlib.cm.gist_earth_ m.colorbar(pt) else: pt=m.scatter(x,y,c=data_plot,marker=’s’,s=350, cmap = plt.matplotlib.cm.gist_earth_ m.colorbar(pt) labels = data_plot.values for label, xpt, ypt in zip(labels, x, y): plt.text(xpt, ypt, label, horizontalalignment=’center’, verticalalignment=’center’, plt.show() return plt 3.2 Mapping function test area First line converts the 3D array to being 2D by resetting the coordinate. Second line is the mapping function being used. In [14]: test=ocf_dailypop_forecasts_vic[:,0,:].reset_coords(names=’valid_start’,drop=True) mapping_func(daily_prcp_obs_vic,vic,’vic’,’Observed rainfall’,plot_type=’precip’) 8 9 Out[14]: <module ’matplotlib.pyplot’ from ’/home/students/miniconda2/lib/python2.7/site-packages/matplot In [ ]: 10