dukelabnew.doc

advertisement
Fuentes’ LAB NOTES:
www4.stat.ncsu.edu/~fuentes/dukelab
DATASETS NEEDED:
coalash.dt (Coal Ash dataset)
www4.stat.ncsu.edu/~fuentes/coalash.txt
davis.txt (topographic heights)
www4.stat.ncsu.edu/~fuentes/davis.txt
ozone.txt (ozone values)
www4.stat.ncsu.edu/~fuentes/ozone.txt
climate.txt (climate data frame)
www4.stat.ncsu.edu/~fuentes/climate.txt
Named the 3 files coalash.txt, davis.txt and ozone.txt in your directory.
software R with library geoR, fields and akima
These packages are not compatible with each other
Use command detach(package:fields) to detach packages.
This lab has 2 parts:
Part I: spatial estimation, and
Part II: Bayesian spatial estimation and prediction
PART I
Estimating the spatial structure
##EXERCISE
##EXERCESE
##EXERCISE
##EXERCISE
1:
2:
3:
4:
How to plot the data and get empirical semivariograms
WNLS VARIOGRAM ESTIMATOR
REML
Profile likelihood
########################################################################
#Using geoR library
# download your data using read.table
# to learn more about read.table type
# > ? read.table
coal.ash<-read.table('coalash.txt')
coal.m<-as.matrix(coal.ash)
davis.txt<-read.table('davis.txt')
davis.m<-as.matrix(davis.txt)[,1:3]
##EXERCISE 1: How to plot the data and get binned semivariograms
#TO PLOT THE DATA
X11()
plot.geodata(coords=coal.m[,2:3],data=coal.m[,4])
# The plots returned are:
## - A plot with data locations. Symbols, theirs sizes
(and colors) sepatates data from diferent quartiles as follows
###
(circles) : 1st quantile
###
(triangles) : 2nd
###
(plus) : 3rd
###
(crosses) : 4th
## - A plot with data-values agains coordinate X
## - A plot with data-values agains coordinate Y
#TO PRODUCE A VARIOGRAM CLOUD
cloud1<-variog(coords=coal.m[,2:3],data=coal.m[,4]
cloud2<-variog(coords=coal.m[,2:3],data=coal.m[,4]
estimator.type='modulus')
,option='cloud')
,option='cloud',
par(mfrow=c(1,2))
plot(cloud1)
plot(cloud2)
bin1<-variog(coords=coal.m[,2:3],data=coal.m[,4],
bin.cloud=T,uvec=seq(0,10,l=11))
## uvec : n-element vector of values to define the binning;
## the values of uvec defines the center of the bins
bin2<-variog(coords=coal.m[,2:3],data=coal.m[,4],
bin.cloud=T,estimator.type='modulus',uvec=seq(0,10,l=11))
par(mfrow=c(1,3))
plot(bin1)
plot(bin1,bin.cloud=T)
plot(bin2)
#with Davis data:
bin.davis<-variog(coords=davis.m[,1:2],data=davis.m[,3],
bin.cloud=T,uvec=seq(0,4,l=11))
plot(bin.davis)
#############################################################################
##EXERCESE 2: WNLS ESTIMATOR
bin3<-variog(coords=coal.m[,2:3],data=coal.m[,4])
wls<-variofit(bin3, ini.cov.pars=c(.5,3), fix.nugget=F,
nugget=0,cov.model="exponential")
summary(wls)
#output of wls
# THE WEIGHTS ARE OBTAINED FROM AN ITERATIVE, NONLINEAR ESTIMATION
# ROUTIME, using 'nlminb',
# nlminb is based on the Fortran functions dmnfb, dmngb, and
#
dmnhb (Gay (1983; 1984), A T & T (1984)) from NETLIB
#
(Dongarra and Grosse (1987)).
#
# ini: initial values of the parameters: partial sill and range
#
# covariance model, you can choose: "exponential", "matern", "gaussian",
#
"spherical", "wave", "powered.exponential"
#
# if you choose the mattern then the smoothing parameter is kappa
# here kappa is .5 (the default)
#
# You can say fix.nugget=T then the nugget will be 0 and
# you do not need to initizalize the nugget only the partial sill
# and range, therefore ini=c(.5,.3) where .5 is the initial value
# for partial sill and .3 is the initial value for range.
> wls$nugget:
[1] 1.013864
$cov.pars:
[1] 1.193859 10.445359
########################################################################
##EXERCISE 3: REML
#TO OBTAIN THE REML AND ML ESTIMATORS
#use likfit
#ini to initialize parameters
# partial sill and range
# kappa is power for powered exponential model
# kappa is smoothing for Matern
#method is ML or REML
#trend is cte, but you can also choose a linear
# trend by saying trend=1 or quadratic trend=2
coal.ml<- likfit(coords=coal.m[,2:3],data=coal.m[,4], fix.nugget=T,
ini=c(.5,.5),kappa=.5,
trend ="cte",method='ML',cov.model="powered.exponential")
coalani.ml<- likfit(coords=coal.m[,2:3],data=coal.m[,4], fix.nugget=T,
ini=c(.5,.5),kappa=.5, fix.psiA = FALSE, psiA = 0, fix.psiR = FALSE, psiR = 1,
trend ="cte",method='ML',cov.model="powered.exponential")
summary(coal.ml)
#output:
covariance model: powered.exponential with kappa = 0.5
nugget
sill
range
0 1.564391 0.6769796
REML
covariance model: exponential
nugget
sill
range
0 1.599048 0.8074612
coal.ml<- likfit(coords=coal.m[,2:3],data=coal.m[,4], fix.nugget=T,
ini=c(.5,.5),
trend ="cte",method='ML',cov.model="exponential")
ML:
covariance model: exponential
nugget
sill
range
0 1.576983 0.7874607
REML
covariance model: gaussian
nugget
sill
range
0 1.567491 0.7852996
summary(coalani.ml)
Parameters of the mean component (trend):
beta
9.7224
Parameters of the spatial component:
correlation function: powered.exponential
(estimated) variance parameter sigmasq (partial sill) = 1.504
(estimated) cor. fct. parameter phi (range parameter) = 0.5439
(fixed) extra parameter kappa = 0.5
anisotropy parameters:
(estimated) anisotropy angle = 0.6617 ( 38 degrees )
(estimated) anisotropy ratio = 2.020
############################################################################
EXERCISE 4: PROFILE LIKELIHOOD
# profile log-likelihood for sill and range, here nugget is fixed
#first we give range of values to get the likelihood for sill -- range:
coal.ml<- likfit(coords=coal.m[,2:3],data=coal.m[,4], fix.nugget=T,
ini=c(.5,.5),
trend ="cte",method='ML',cov.model="exponential")
#now we get the profile likelihood:
coal.prof<-proflik(coal.ml,
coords=coal.m[,2:3],data=coal.m[,4],
sill.values=seq(0.1,5,length=5))
#this takes a couple of minutes
# to plot the
profile
plot.proflik(coal.prof)
###################################
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
PART II
Bayesian Spatial estimation and prediction:
##EXERCISE 1: Bayesian Estimation of covariance parameters
##EXERCISE 2: Moving neighborhood kriging
##EXERCISE 3: ANISTROPY
##EXERCISE 4: Maps and Images
########################################################################
# download your data using read.table
# to learn more about read.table type
# > ? read.table
coal.ash<-read.table('coalash.txt')
coal.m<-as.matrix(coal.ash)
davis.txt<-read.table('davis.txt')
davis.m<-as.matrix(davis.txt)
###################################
##EXERCISE 1: Estimating model parameters
###A. Bayesian estimates of model parameters:
#Function:
krige.bayes
# You obtain posteriors for the trend, range, partial
# Recommended (default) priors:
# uniform for beta, 1/sigma^2 for the partial sill
# and discrete uniform for the range and nugget.
#
sill and nugget
# aniso.pars are the parameters for anisotropy: stretching and rotation angle.
#
# the default is with nugget 0, but you
# can also get a posterior for the nugget when you give a prior to the nugget,
# by saying nugget.prior="uniform"
#
#Additional information:
#---------------------# This function computes (Bayesian) estimates of a spatial linear
#model and performs Bayesian and/or kriging prediction in a set of
#locations specified by the user.
# Priors options for mean and/or covariance parameters
# coords
: data coordinates (vector for 1d or matrix for 2d data)
# data
: vector with data values
# locations
: coordinates of points to be estimated (vector for 1d or
# matrix for 2d data). If locations='no' only model parameters estimates are
#returned if the full bayesian model is considered.
# trend.d
: trend data in data locations. Default is constant trend.
#The options '1st' or '2nd' builts a first or second degree polinomial.
# trend.l
: trend data in locations to be estimated. Default
#is constant trend. The options '1st' or '2nd' builts a first or second
#degree polinomial.
#
#Example:
coal.by1<-krige.bayes( coords=coal.m[,2:3],data=coal.m[,4],
prior=prior.control(phi.discrete=seq(0, 3, l=21)))
#nugget is fixed to zero or anyother value (here 0):
#
# see the histograms of the posteriors
# for the mean, the partial sill and the range:
X11()
par(mfrow=c(1,3))
hist(coal.by1$posterior$sample$beta)
hist(coal.by1$posterior$sample$sigmasq)
hist(coal.by1$posterior$sample$phi)
# to see your function type:
coal.by1$call
#posterior for MEAN
coal.by1$posterior$beta$summary
# mean
median mode.cond
# 9.740055 9.740496 9.758056
#posterior for partial sill
coal.by1$posterior$sigmasq
# mean
median mode.cond
# 9.740055 9.740496 9.758056
#posterior for range
coal.by1$posterior$phi
#
mean
median
mode
# 0.8645331 0.7894737 0.7894737
###B. Bayesian prediction:
#USE ksline for prediction, this function is still very slow,
#though it is being updated.
#locations where we want Bayesian prediction:
loci<-matrix(c(2,3.5,4,5.5),
ncol=2,byrow=T)
loci
[,1] [,2]
[1,]
[2,]
2
4
3.5
5.5
# this is the same as before for krige.bayes, but now we
# specify where you want to predict, by using the locations argument:
davis.bpred<krige.bayes( coords=davis.m[,1:2],data=davis.m[,3],
locations=loci,prior=prior.control(phi.discrete=seq(0, 3, l=21)))
davis.bpred$predictive$simulations
davis.bpred$predictive$mean
davis.bpred$predictive$variance
# 803.0342 735.7671
# 369.3989 461.2960
#Bayesian predictive distributions:
par(mfrow=c(2,1)
hist(davis.bpred$predictive$simulations[1,])
hist(davis.bpred$predictive$simulations[2,])
#
#
# If
#say
# or
# of
you want a nugget prior, instead of nugget=0,
nugget= c(1,2,5,6)
any other vector with the values of the discrete uniform prior
nugget/partial sill. Example:
coal.b2<-krige.bayes(coords=coal.m[,2:3],data=coal.m[,4],
prior=prior.control(tausq.rel.prior="uniform",
tausq.rel.discrete=seq(0,0.5,l=6),
phi.discrete=seq(0,3,l=25)))
X11()
par(mfrow=c(1,4))
hist(coal.b2$posterior$sample$beta)
hist(coal.b2$posterior$sample$sigmasq)
hist(coal.b2$posterior$sample$phi)
hist(coal.b2$posterior$sample$tausq.rel)
#Notice that here tausq.rel means relative nugget (nugget divided by the
# partial sill). So the argument is defining a discrete prior for the relative
# nugget with support points at 0, 10%, 20%, ... 50%
############################################################
##EXERCISE 2: Moving neighborhood kriging
###"Kriging performed in global neighborhood":
#
cov.pars : covariance parameters vector (partial sill,range)
#
m0 : defines the type of kriging:
#
'sk': simple kriging (no trend)
#
'ok': ordinary kriging (constant trend)
#
'kt': kriging with a trend model(universal)
#
kappa : kappa (smoothing) is the smoothing
# parameter for Matern or powered exponential covariance function
coal.k<-ksline(coords=coal.m[,2:3],data=coal.m[,4],locations=loci,
cov.pars=c(3,1),nugget=0,
cov.model="matern",
kappa=.5,m0="ok")
##coal.k
coal.k$predict
[1] 10.07305 11.22320
coal.k$krige.var
[1] 2.684456 1.349229
coal.k$beta
[,1]
[1,] 9.752618
coal.k$message:
[1] "Kriging performed in global neighborhood"
##Universal Kriging:
#
# the value of trend here is 2 which means we have a polynomial
# of degree 2 for a two dimensional problem, therefore
# we need to estimate 5 parameters
coal.uk<-ksline(coords=coal.m[,2:3],data=coal.m[,4],locations=loci,
cov.pars=c(3,1),nugget=0,
cov.model="matern",
kappa=.5,m0="kt",trend=2)
coal.uk$predict
[1] 10.77988 11.24153
coal.uk$krige.var
[1] 3.023352 1.349461
coal.uk$beta
coefficients:
[1,] 1.124853e+01
[2,] -2.090557e-01
(1)
(x)
[3,] -1.381999e-02
[4,] -2.817476e-03
[5,] -9.021021e-04
[6,] 6.312104e-03
(y)
(x^2)
(y^2)
(x*y)
### kriging performed in moving neighbourhood:
coal.wind <-ksline(coords=coal.m[,2:3],data=coal.m[,4],locations=loci,
cov.pars=c(3,1),nugget=0,
cov.model="matern",
kappa=.5,m0="ok",nwin=5)
#nwin number of closest neighbors
coal.wind
$predict:
[1] 10.43828 10.84857
$krige.var:
[1] 3.136432 1.357889
###########################################3
# linear trend not subregions:
# when trend is one implies a linear trend in two dimensions
# therefore we have 3 parameters to estimate
coal.wind1 <-ksline(coords=coal.m[,2:3],data=coal.m[,4],locations=loci,
cov.pars=c(3,1),nugget=0,
cov.model="matern",
kappa=.5,m0="kt",trend=1,nwin="full")
> coal.wind1
coal.wind1$predict
[1] 10.62524 11.23732
coal.wind1$krige.var
[1] 2.774784 1.349341
coal.wind1$beta
[,1]
[1,] 10.86216801
[2,] -0.16286289
[3,]
0.01077067
### kriging performed in moving neighborhood with linear trend
coal.wind2 <-ksline(coords=coal.m[,2:3],data=coal.m[,4],locations=loci,
cov.pars=c(3,1),nugget=0,
cov.model="matern",
kappa=.5,m0="kt",trend=1,nwin=5)
coal.wind2
coal.wind2$predict
[1] 9.669875 10.876154
coal.wind2$krige.var
[1] 8.350462 1.362241
t(coal.wind2$beta[1:2,])
[,1]
[,2]
[,3]
[1,] 7.467317 0.4058152 0.3430783
[2,] 7.030745 0.4716316 0.3130314
######################
## EXERCISE 3: ANISOTROPY.
#The function coords.aniso transform your coordinates
# into the coordinates in the new space where we have isotropy
# first one is the rotation angle (in radians) (psiA) and the second is psiR
# where psiR is
# the stretching parameter
# and it is greater than 1, if lambda is 1 there is not
# stretching (these parameters can be estimated with likfit)
#
#
new.coord<-coords.aniso(coal.m[,2:3],aniso.pars=c(.2,3))
# do kriging for anisotropic case
coal.wind2 <-ksline(coords=coal.m[,2:3],data=coal.m[,4],locations=loci,
cov.pars=c(3,1),nugget=0,
cov.model="exponential",
m0="kt",trend=1,aniso.pars=c(.2,3))
#################################
##EXERCISE 4: Maps and Images
#use library akima and fields
X11()
ozone.txt<-read.table('ozone.txt')
rx <- range(ozone.txt[,1])
ry <- range(ozone.txt[,2])
#to obtain map of us within the limits rx (long) and ry (lat):
par(mfrow=c(1,1))
US(xlim = rx, ylim = ry, lwd = 2, col = 1,add=F)
surf<- interp(ozone.txt[,1], ozone.txt[,2], ozone.txt[,3])
#to create an image:
image(surf$x,surf$y,surf$z, add = F,graphics.reset=T,col=topo.colors(12))
#to add points in the image:
text(ozone.txt[,1:2], labels=ozone.txt[,3])
US(xlim = rx, ylim = ry, lwd = 2, col = 1,add=T)
#
#
#to create a 3-d plot:
persp(surf$x,surf$y,surf$z)
# to get a contour:
contour(surf$x,surf$y,surf$z)
#to add a legend to the image:
image.plot(surf$x,surf$y,surf$z, add = F,graphics.reset=T,col=topo.colors(12))
#
#DESCRIPTION of image:
#
Creates an image, under some graphics devices, of shades
#
of gray or colors that represent a third dimension.
#
#USAGE:
#
image(x, y, z, zlim = range(z), add = F)
#
#REQUIRED ARGUMENTS:
#x:
vector containing x coordinates of grid over which z' is
#
evaluated.
The values should be in increasing order;
#
missing values are not accepted.
#y:
vector of grid y coordinates. The values should be in
#
increasing order; missing values are not accepted.
#z:
is a matrix, z[i,j]' is
#
evaluated at x[i]', y[j]'). The rows of z' are indexed by
#
x', and the columns by y'. Missing values (NA's) are
#
allowed.
#####
#
# interp: Interpolates the value of the third variable onto an
# evenly spaced grid of the first two variables. default is 40x40
# grid
#to save the plot as a postscript file:
postscript(file="example.ps")
image.plot(surf$x,surf$y,surf$z, add = F,graphics.reset=T,col=topo.colors(12))
dev.off()
----------------------------------------------------------------------------# Using surface plotting
x1<- 5:10
x2<- 1:6
outer( x1,x2,"+")-> f
f
par(mfrow=c(2,2))
image( x1,x2,f)
persp( x1,x2,f)
contour( x1,x2,f)
# an important list for surface plots
look<- list( x=x1, y=x2,z=f)
image(look)
LOCATE POINTS IN A MAP
>library(fields)
>US()
read.table("climate.txt")->climate
points( climate$lon, climate$lat, pch="x") # use period for plot character
text( climate$lon, climate$lat, climate$elev) # label by elevation
## xlim and y lim to indicate the limits of your usa graph
US( xlim=c(-82, -73), ylim=c(32.5,41))
points( climate$lon, climate$lat, pch="o") # use period for plot character
# label by city names but make the text 1/2 the default size.
# left justify the label
locator(1)-> hold # now click on a point in the plot with the left button
hold # gives you the coordinates
points( hold, pch="X") #
text( hold$x, hold$y, "Here is my favorite place", adj=1)
Download