Fuentes’ LAB NOTES: www4.stat.ncsu.edu/~fuentes/dukelab
DATASETS NEEDED: coalash.dt (Coal Ash dataset) www4.stat.ncsu.edu/~fuentes/coalash.txt davis.txt (topographic heights) www4.stat.ncsu.edu/~fuentes/davis.txt ozone.txt (ozone values) www4.stat.ncsu.edu/~fuentes/ozone.txt
Named the 3 files coalash.txt, davis.txt and ozone.txt in your directory. software R with library geoR, fields and akima
These packages are not compatible with each other
Use command detach(package:fields) to detach packages.
This lab has 2 parts:
Part I: spatial estimation, and
Part II: Bayesian spatial estimation and prediction
PART I
Estimating the spatial structure
##EXERCISE 1: How to plot the data and get empirical semivariograms
##EXERCESE 2: WNLS VARIOGRAM ESTIMATOR
##EXERCISE 3: REML
##EXERCISE 4: Profile likelihood
########################################################################
#Using geoR library
# download your data using read.table
# to learn more about read.table type
# > ? read.table coal.ash<-read.table('coalash.txt') coal.m<-as.matrix(coal.ash) davis.txt<-read.table('davis.txt') davis.m<-as.matrix(davis.txt)[,1:3]
##EXERCISE 1: How to plot the data and get binned semivariograms
#TO PLOT THE DATA
X11() plot.geodata(coords=coal.m[,2:3],data=coal.m[,4])
# The plots returned are:
## - A plot with data locations. Symbols, theirs sizes
(and colors) sepatates data from diferent quartiles as follows
### (circles) : 1st quantile
### (triangles) : 2nd
### (plus) : 3rd
### (crosses) : 4th
## - A plot with data-values agains coordinate X
## - A plot with data-values agains coordinate Y
#TO PRODUCE A VARIOGRAM CLOUD cloud1<-variog(coords=coal.m[,2:3],data=coal.m[,4] ,option='cloud') cloud2<-variog(coords=coal.m[,2:3],data=coal.m[,4] ,option='cloud', estimator.type='modulus') par(mfrow=c(1,2)) plot(cloud1) plot(cloud2) bin1<-variog(coords=coal.m[,2:3],data=coal.m[,4], bin.cloud=T,uvec=seq(0,10,l=11))
## uvec : n-element vector of values to define the binning;
## the values of uvec defines the center of the bins bin2<-variog(coords=coal.m[,2:3],data=coal.m[,4], bin.cloud=T,estimator.type='modulus',uvec=seq(0,10,l=11)) par(mfrow=c(1,3)) plot(bin1) plot(bin1,bin.cloud=T) plot(bin2)
#with Davis data: bin.davis<-variog(coords=davis.m[,1:2],data=davis.m[,3], bin.cloud=T,uvec=seq(0,4,l=11)) plot(bin.davis)
#############################################################################
##EXERCESE 2: WNLS ESTIMATOR bin3<-variog(coords=coal.m[,2:3],data=coal.m[,4]) wls<-variofit(bin3, ini.cov.pars=c(.5,3), fix.nugget=F, nugget=0,cov.model="exponential") summary(wls)
#output of wls
# THE WEIGHTS ARE OBTAINED FROM AN ITERATIVE, NONLINEAR ESTIMATION
# ROUTIME, using 'nlminb',
# nlminb is based on the Fortran functions dmnfb, dmngb, and
# dmnhb (Gay (1983; 1984), A T & T (1984)) from NETLIB
# (Dongarra and Grosse (1987)).
#
# ini: initial values of the parameters: partial sill and range
#
# covariance model, you can choose: "exponential", "matern", "gaussian",
# "spherical", "wave", "powered.exponential"
#
# if you choose the mattern then the smoothing parameter is kappa
# here kappa is .5 (the default)
#
# You can say fix.nugget=T then the nugget will be 0 and
# you do not need to initizalize the nugget only the partial sill
# and range, therefore ini=c(.5,.3) where .5 is the initial value
# for partial sill and .3 is the initial value for range.
> wls$nugget:
[1] 1.013864
$cov.pars:
[1] 1.193859 10.445359
########################################################################
##EXERCISE 3: REML
#TO OBTAIN THE REML AND ML ESTIMATORS
#use likfit
#ini to initialize parameters
# partial sill and range
# kappa is power for powered exponential model
# kappa is smoothing for Matern
#method is ML or REML
#trend is cte, but you can also choose a linear
# trend by saying trend=1 or quadratic trend=2 coal.ml<- likfit(coords=coal.m[,2:3],data=coal.m[,4], fix.nugget=T, ini=c(.5,.5),kappa=.5, trend ="cte",method='ML',cov.model="powered.exponential") coalani.ml<- likfit(coords=coal.m[,2:3],data=coal.m[,4], fix.nugget=T, ini=c(.5,.5),kappa=.5, fix.psiA = FALSE, psiA = 0, fix.psiR = FALSE, psiR = 1, trend ="cte",method='ML',cov.model="powered.exponential") summary(coal.ml)
#output: covariance model: powered.exponential with kappa = 0.5
nugget sill range
0 1.564391 0.6769796
REML covariance model: exponential
nugget sill range
0 1.599048 0.8074612 coal.ml<- likfit(coords=coal.m[,2:3],data=coal.m[,4], fix.nugget=T, ini=c(.5,.5), trend ="cte",method='ML',cov.model="exponential")
ML: covariance model: exponential
nugget sill range
0 1.576983 0.7874607
REML covariance model: gaussian
nugget sill range
0 1.567491 0.7852996 summary(coalani.ml)
Parameters of the mean component (trend):
beta
9.7224
Parameters of the spatial component:
correlation function: powered.exponential
(estimated) variance parameter sigmasq (partial sill) = 1.504
(estimated) cor. fct. parameter phi (range parameter) = 0.5439
(fixed) extra parameter kappa = 0.5
anisotropy parameters:
(estimated) anisotropy angle = 0.6617 ( 38 degrees )
(estimated) anisotropy ratio = 2.020
############################################################################
EXERCISE 4: PROFILE LIKELIHOOD
# profile log-likelihood for sill and range, here nugget is fixed
#first we give range of values to get the likelihood for sill -- range: coal.ml<- likfit(coords=coal.m[,2:3],data=coal.m[,4], fix.nugget=T, ini=c(.5,.5), trend ="cte",method='ML',cov.model="exponential")
#now we get the profile likelihood: coal.prof<-proflik(coal.ml, coords=coal.m[,2:3],data=coal.m[,4], sill.values=seq(0.1,5,length=5))
#this takes a couple of minutes
# to plot the profile plot.proflik(coal.prof)
###################################
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
PART II
Bayesian Spatial estimation and prediction:
##EXERCISE 1: Bayesian Estimation of covariance parameters
##EXERCISE 2: Moving neighborhood kriging
##EXERCISE 3: ANISTROPY
##EXERCISE 4: Maps and Images
########################################################################
# download your data using read.table
# to learn more about read.table type
# > ? read.table coal.ash<-read.table('coalash.txt') coal.m<-as.matrix(coal.ash) davis.txt<-read.table('davis.txt') davis.m<-as.matrix(davis.txt)
###################################
##EXERCISE 1: Estimating model parameters
###A. Bayesian estimates of model parameters:
#Function: krige.bayes
# You obtain posteriors for the trend, range, partial sill and nugget
# Recommended (default) priors:
# uniform for beta, 1/sigma^2 for the partial sill
# and discrete uniform for the range and nugget.
#
# aniso.pars are the parameters for anisotropy: stretching and rotation angle.
#
# the default is with nugget 0, but you
# can also get a posterior for the nugget when you give a prior to the nugget,
# by saying nugget.prior="uniform"
#
#Additional information:
#----------------------
# This function computes (Bayesian) estimates of a spatial linear
#model and performs Bayesian and/or kriging prediction in a set of
#locations specified by the user.
# Priors options for mean and/or covariance parameters
# coords : data coordinates (vector for 1d or matrix for 2d data)
# data : vector with data values
# locations : coordinates of points to be estimated (vector for 1d or
# matrix for 2d data). If locations='no' only model parameters estimates are
#returned if the full bayesian model is considered.
# trend.d : trend data in data locations. Default is constant trend.
#The options '1st' or '2nd' builts a first or second degree polinomial.
# trend.l : trend data in locations to be estimated. Default
#is constant trend. The options '1st' or '2nd' builts a first or second
#degree polinomial.
#
#Example:
#nugget is fixed to zero or anyother value (here 0):
#
# see the histograms of the posteriors
# for the mean, the partial sill and the range:
X11() par(mfrow=c(1,3)) hist(coal.by1$posterior$sample$beta) hist(coal.by1$posterior$sample$sigmasq) hist(coal.by1$posterior$sample$phi)
# to see your function type: coal.by1$call
#posterior for MEAN coal.by1$posterior$beta$summary
# mean median mode.cond
# 9.740055 9.740496 9.758056
#posterior for partial sill coal.by1$posterior$sigmasq
# mean median mode.cond
# 9.740055 9.740496 9.758056
#posterior for range coal.by1$posterior$phi
# mean median mode
# 0.8645331 0.7894737 0.7894737
###B. Bayesian prediction:
#USE ksline for prediction, this function is still very slow,
#though it is being updated.
#locations where we want Bayesian prediction: loci<-matrix(c(2,3.5,4,5.5), ncol=2,byrow=T)
loci
[,1] [,2]
[1,] 2 3.5
[2,] 4 5.5
# this is the same as before for krige.bayes, but now we
# specify where you want to predict, by using the locations argument: davis.bpred<- krige.bayes( coords=davis.m[,1:2],data=davis.m[,3],
locations=loci,prior=prior.control(phi.discrete=seq(0, 3, l=21))) davis.bpred$predictive$simulations davis.bpred$predictive$mean davis.bpred$predictive$variance
# 803.0342 735.7671
# 369.3989 461.2960
#Bayesian predictive distributions: par(mfrow=c(2,1) hist(davis.bpred$predictive$simulations[1,]) hist(davis.bpred$predictive$simulations[2,])
#
#
# If you want a nugget prior, instead of nugget=0,
#say nugget= c(1,2,5,6)
# or any other vector with the values of the discrete uniform prior
# of nugget/partial sill. Example: coal.b2<-krige.bayes(coords=coal.m[,2:3],data=coal.m[,4], prior=prior.control(tausq.rel.prior="uniform",
tausq.rel.discrete=seq(0,0.5,l=6),
phi.discrete=seq(0,3,l=25)))
X11() par(mfrow=c(1,4)) hist(coal.b2$posterior$sample$beta) hist(coal.b2$posterior$sample$sigmasq) hist(coal.b2$posterior$sample$phi) hist(coal.b2$posterior$sample$tausq.rel)
#Notice that here "nugget" means relative nugget (nugget divided by the
# sill). So the argument is defining a discrete prior for the relative
# nugget with support points at 0, 10%, 20%, ... 50%
############################################################
##EXERCISE 2: Moving neighborhood kriging
###"Kriging performed in global neighborhood":
# cov.pars : covariance parameters vector (partial sill,range)
# m0 : defines the type of kriging:
# 'sk': simple kriging (no trend)
# 'ok': ordinary kriging (constant trend)
# 'kt': kriging with a trend model(universal)
# kappa : kappa (smoothing) is the smoothing
# parameter for Matern or powered exponential covariance function coal.k<-ksline(coords=coal.m[,2:3],data=coal.m[,4],locations=loci, cov.pars=c(3,1),nugget=0, cov.model="matern", kappa=.5,m0="ok")
##coal.k
coal.k$predict
[1] 10.07305 11.22320
coal.k$krige.var
[1] 2.684456 1.349229
coal.k$beta
[,1]
[1,] 9.752618
coal.k$message:
[1] "Kriging performed in global neighborhood"
##Universal Kriging:
#
# the value of trend here is 2 which means we have a polynomial
# of degree 2 for a two dimensional problem, therefore
# we need to estimate 5 parameters coal.uk<-ksline(coords=coal.m[,2:3],data=coal.m[,4],locations=loci, cov.pars=c(3,1),nugget=0, cov.model="matern", kappa=.5,m0="kt",trend=2) coal.uk$predict
[1] 10.77988 11.24153 coal.uk$krige.var
[1] 3.023352 1.349461 coal.uk$beta
coefficients:
[1,] 1.124853e+01 (1)
[2,] -2.090557e-01 (x)
[3,] -1.381999e-02 (y)
[4,] -2.817476e-03 (x^2)
[5,] -9.021021e-04 (y^2)
[6,] 6.312104e-03 (x*y)
### kriging performed in moving neighbourhood:
coal.wind <-ksline(coords=coal.m[,2:3],data=coal.m[,4],locations=loci, cov.pars=c(3,1),nugget=0, cov.model="matern", kappa=.5,m0="ok",nwin=5)
#nwin number of closest neighbors coal.wind
$predict:
[1] 10.43828 10.84857
$krige.var:
[1] 3.136432 1.357889
###########################################3
# linear trend not subregions:
# when trend is one implies a linear trend in two dimensions
# therefore we have 3 parameters to estimate coal.wind1 <-ksline(coords=coal.m[,2:3],data=coal.m[,4],locations=loci, cov.pars=c(3,1),nugget=0, cov.model="matern", kappa=.5,m0="kt",trend=1,nwin="full")
> coal.wind1 coal.wind1$predict
[1] 10.62524 11.23732 coal.wind1$krige.var
[1] 2.774784 1.349341 coal.wind1$beta
[,1]
[1,] 10.86216801
[2,] -0.16286289
[3,] 0.01077067
### kriging performed in moving neighborhood with linear trend coal.wind2 <-ksline(coords=coal.m[,2:3],data=coal.m[,4],locations=loci, cov.pars=c(3,1),nugget=0, cov.model="matern", kappa=.5,m0="kt",trend=1,nwin=5) coal.wind2 coal.wind2$predict
[1] 9.669875 10.876154 coal.wind2$krige.var
[1] 8.350462 1.362241 t(coal.wind2$beta[1:2,])
[,1] [,2] [,3]
[1,] 7.467317 0.4058152 0.3430783
[2,] 7.030745 0.4716316 0.3130314
######################
## EXERCISE 3: ANISOTROPY.
#The function coords.aniso transform your coordinates
# into the coordinates in the new space where we have isotropy
# first one is the rotation angle (in radians) (psiA) and the second is psiR
# where psiR is
# the stretching parameter
# and it is greater than 1, if lambda is 1 there is not
# stretching (these parameters can be estimated with likfit)
#
# new.coord<-coords.aniso(coal.m[,2:3],aniso.pars=c(.2,3))
# do kriging for anisotropic case
coal.wind2 <-ksline(coords=coal.m[,2:3],data=coal.m[,4],locations=loci, cov.pars=c(3,1),nugget=0, cov.model="exponential", m0="kt",trend=1,aniso.pars=c(.2,3))
#################################
##EXERCISE 4: Maps and Images
#use library akima and fields
X11()
ozone.txt<-read.table('ozone.txt')
rx <- range(ozone.txt[,1])
ry <- range(ozone.txt[,2])
#to obtain map of us within the limits rx (long) and ry (lat):
par(mfrow=c(1,1))
US(xlim = rx, ylim = ry, lwd = 2, col = 1,add=F)
surf<- interp(ozone.txt[,1], ozone.txt[,2], ozone.txt[,3])
#to create an image:
image(surf$x,surf$y,surf$z, add = F,graphics.reset=T,col=topo.colors(12))
#to add points in the image:
text(ozone.txt[,1:2], labels=ozone.txt[,3])
US(xlim = rx, ylim = ry, lwd = 2, col = 1,add=T)
#
#
#to create a 3-d plot:
persp(surf$x,surf$y,surf$z)
# to get a contour:
contour(surf$x,surf$y,surf$z)
#to add a legend to the image:
image.plot(surf$x,surf$y,surf$z, add = F,graphics.reset=T,col=topo.colors(12))
#
#DESCRIPTION of image:
# Creates an image, under some graphics devices, of shades
# of gray or colors that represent a third dimension.
#
#USAGE:
# image(x, y, z, zlim = range(z), add = F)
#
#REQUIRED ARGUMENTS:
#x: vector containing x coordinates of grid over which z' is
# evaluated. The values should be in increasing order;
# missing values are not accepted.
#y: vector of grid y coordinates. The values should be in
# increasing order; missing values are not accepted.
#z: is a matrix, z[i,j]' is
# evaluated at x[i]', y[j]'). The rows of z' are indexed by
# x', and the columns by y'. Missing values (NA's) are
# allowed.
#####
#
# interp: Interpolates the value of the third variable onto an
# evenly spaced grid of the first two variables. default is 40x40
# grid
#to save the plot as a postscript file: postscript(file="example.ps")
image.plot(surf$x,surf$y,surf$z, add = F,graphics.reset=T,col=topo.colors(12)) dev.off()