Fuentes’ LAB NOTES: DATASETS NEEDED: www4.stat.ncsu.edu/~fuentes/dukelab

advertisement

Fuentes’ LAB NOTES: www4.stat.ncsu.edu/~fuentes/dukelab

DATASETS NEEDED: coalash.dt (Coal Ash dataset) www4.stat.ncsu.edu/~fuentes/coalash.txt davis.txt (topographic heights) www4.stat.ncsu.edu/~fuentes/davis.txt ozone.txt (ozone values) www4.stat.ncsu.edu/~fuentes/ozone.txt

Named the 3 files coalash.txt, davis.txt and ozone.txt in your directory. software R with library geoR, fields and akima

These packages are not compatible with each other

Use command detach(package:fields) to detach packages.

This lab has 2 parts:

Part I: spatial estimation, and

Part II: Bayesian spatial estimation and prediction

PART I

Estimating the spatial structure

##EXERCISE 1: How to plot the data and get empirical semivariograms

##EXERCESE 2: WNLS VARIOGRAM ESTIMATOR

##EXERCISE 3: REML

##EXERCISE 4: Profile likelihood

########################################################################

#Using geoR library

# download your data using read.table

# to learn more about read.table type

# > ? read.table coal.ash<-read.table('coalash.txt') coal.m<-as.matrix(coal.ash) davis.txt<-read.table('davis.txt') davis.m<-as.matrix(davis.txt)[,1:3]

##EXERCISE 1: How to plot the data and get binned semivariograms

#TO PLOT THE DATA

X11() plot.geodata(coords=coal.m[,2:3],data=coal.m[,4])

# The plots returned are:

## - A plot with data locations. Symbols, theirs sizes

(and colors) sepatates data from diferent quartiles as follows

### (circles) : 1st quantile

### (triangles) : 2nd

### (plus) : 3rd

### (crosses) : 4th

## - A plot with data-values agains coordinate X

## - A plot with data-values agains coordinate Y

#TO PRODUCE A VARIOGRAM CLOUD cloud1<-variog(coords=coal.m[,2:3],data=coal.m[,4] ,option='cloud') cloud2<-variog(coords=coal.m[,2:3],data=coal.m[,4] ,option='cloud', estimator.type='modulus') par(mfrow=c(1,2)) plot(cloud1) plot(cloud2) bin1<-variog(coords=coal.m[,2:3],data=coal.m[,4], bin.cloud=T,uvec=seq(0,10,l=11))

## uvec : n-element vector of values to define the binning;

## the values of uvec defines the center of the bins bin2<-variog(coords=coal.m[,2:3],data=coal.m[,4], bin.cloud=T,estimator.type='modulus',uvec=seq(0,10,l=11)) par(mfrow=c(1,3)) plot(bin1) plot(bin1,bin.cloud=T) plot(bin2)

#with Davis data: bin.davis<-variog(coords=davis.m[,1:2],data=davis.m[,3], bin.cloud=T,uvec=seq(0,4,l=11)) plot(bin.davis)

#############################################################################

##EXERCESE 2: WNLS ESTIMATOR bin3<-variog(coords=coal.m[,2:3],data=coal.m[,4]) wls<-variofit(bin3, ini.cov.pars=c(.5,3), fix.nugget=F, nugget=0,cov.model="exponential") summary(wls)

#output of wls

# THE WEIGHTS ARE OBTAINED FROM AN ITERATIVE, NONLINEAR ESTIMATION

# ROUTIME, using 'nlminb',

# nlminb is based on the Fortran functions dmnfb, dmngb, and

# dmnhb (Gay (1983; 1984), A T & T (1984)) from NETLIB

# (Dongarra and Grosse (1987)).

#

# ini: initial values of the parameters: partial sill and range

#

# covariance model, you can choose: "exponential", "matern", "gaussian",

# "spherical", "wave", "powered.exponential"

#

# if you choose the mattern then the smoothing parameter is kappa

# here kappa is .5 (the default)

#

# You can say fix.nugget=T then the nugget will be 0 and

# you do not need to initizalize the nugget only the partial sill

# and range, therefore ini=c(.5,.3) where .5 is the initial value

# for partial sill and .3 is the initial value for range.

> wls$nugget:

[1] 1.013864

$cov.pars:

[1] 1.193859 10.445359

########################################################################

##EXERCISE 3: REML

#TO OBTAIN THE REML AND ML ESTIMATORS

#use likfit

#ini to initialize parameters

# partial sill and range

# kappa is power for powered exponential model

# kappa is smoothing for Matern

#method is ML or REML

#trend is cte, but you can also choose a linear

# trend by saying trend=1 or quadratic trend=2 coal.ml<- likfit(coords=coal.m[,2:3],data=coal.m[,4], fix.nugget=T, ini=c(.5,.5),kappa=.5, trend ="cte",method='ML',cov.model="powered.exponential") coalani.ml<- likfit(coords=coal.m[,2:3],data=coal.m[,4], fix.nugget=T, ini=c(.5,.5),kappa=.5, fix.psiA = FALSE, psiA = 0, fix.psiR = FALSE, psiR = 1, trend ="cte",method='ML',cov.model="powered.exponential") summary(coal.ml)

#output: covariance model: powered.exponential with kappa = 0.5

nugget sill range

0 1.564391 0.6769796

REML covariance model: exponential

nugget sill range

0 1.599048 0.8074612 coal.ml<- likfit(coords=coal.m[,2:3],data=coal.m[,4], fix.nugget=T, ini=c(.5,.5), trend ="cte",method='ML',cov.model="exponential")

ML: covariance model: exponential

nugget sill range

0 1.576983 0.7874607

REML covariance model: gaussian

nugget sill range

0 1.567491 0.7852996 summary(coalani.ml)

Parameters of the mean component (trend):

beta

9.7224

Parameters of the spatial component:

correlation function: powered.exponential

(estimated) variance parameter sigmasq (partial sill) = 1.504

(estimated) cor. fct. parameter phi (range parameter) = 0.5439

(fixed) extra parameter kappa = 0.5

anisotropy parameters:

(estimated) anisotropy angle = 0.6617 ( 38 degrees )

(estimated) anisotropy ratio = 2.020

############################################################################

EXERCISE 4: PROFILE LIKELIHOOD

# profile log-likelihood for sill and range, here nugget is fixed

#first we give range of values to get the likelihood for sill -- range: coal.ml<- likfit(coords=coal.m[,2:3],data=coal.m[,4], fix.nugget=T, ini=c(.5,.5), trend ="cte",method='ML',cov.model="exponential")

#now we get the profile likelihood: coal.prof<-proflik(coal.ml, coords=coal.m[,2:3],data=coal.m[,4], sill.values=seq(0.1,5,length=5))

#this takes a couple of minutes

# to plot the profile plot.proflik(coal.prof)

###################################

+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

PART II

Bayesian Spatial estimation and prediction:

##EXERCISE 1: Bayesian Estimation of covariance parameters

##EXERCISE 2: Moving neighborhood kriging

##EXERCISE 3: ANISTROPY

##EXERCISE 4: Maps and Images

########################################################################

# download your data using read.table

# to learn more about read.table type

# > ? read.table coal.ash<-read.table('coalash.txt') coal.m<-as.matrix(coal.ash) davis.txt<-read.table('davis.txt') davis.m<-as.matrix(davis.txt)

###################################

##EXERCISE 1: Estimating model parameters

###A. Bayesian estimates of model parameters:

#Function: krige.bayes

# You obtain posteriors for the trend, range, partial sill and nugget

# Recommended (default) priors:

# uniform for beta, 1/sigma^2 for the partial sill

# and discrete uniform for the range and nugget.

#

# aniso.pars are the parameters for anisotropy: stretching and rotation angle.

#

# the default is with nugget 0, but you

# can also get a posterior for the nugget when you give a prior to the nugget,

# by saying nugget.prior="uniform"

#

#Additional information:

#----------------------

# This function computes (Bayesian) estimates of a spatial linear

#model and performs Bayesian and/or kriging prediction in a set of

#locations specified by the user.

# Priors options for mean and/or covariance parameters

# coords : data coordinates (vector for 1d or matrix for 2d data)

# data : vector with data values

# locations : coordinates of points to be estimated (vector for 1d or

# matrix for 2d data). If locations='no' only model parameters estimates are

#returned if the full bayesian model is considered.

# trend.d : trend data in data locations. Default is constant trend.

#The options '1st' or '2nd' builts a first or second degree polinomial.

# trend.l : trend data in locations to be estimated. Default

#is constant trend. The options '1st' or '2nd' builts a first or second

#degree polinomial.

#

#Example:

#nugget is fixed to zero or anyother value (here 0):

#

# see the histograms of the posteriors

# for the mean, the partial sill and the range:

X11() par(mfrow=c(1,3)) hist(coal.by1$posterior$sample$beta) hist(coal.by1$posterior$sample$sigmasq) hist(coal.by1$posterior$sample$phi)

# to see your function type: coal.by1$call

#posterior for MEAN coal.by1$posterior$beta$summary

# mean median mode.cond

# 9.740055 9.740496 9.758056

#posterior for partial sill coal.by1$posterior$sigmasq

# mean median mode.cond

# 9.740055 9.740496 9.758056

#posterior for range coal.by1$posterior$phi

# mean median mode

# 0.8645331 0.7894737 0.7894737

###B. Bayesian prediction:

#USE ksline for prediction, this function is still very slow,

#though it is being updated.

#locations where we want Bayesian prediction: loci<-matrix(c(2,3.5,4,5.5), ncol=2,byrow=T)

loci

[,1] [,2]

[1,] 2 3.5

[2,] 4 5.5

# this is the same as before for krige.bayes, but now we

# specify where you want to predict, by using the locations argument: davis.bpred<- krige.bayes( coords=davis.m[,1:2],data=davis.m[,3],

locations=loci,prior=prior.control(phi.discrete=seq(0, 3, l=21))) davis.bpred$predictive$simulations davis.bpred$predictive$mean davis.bpred$predictive$variance

# 803.0342 735.7671

# 369.3989 461.2960

#Bayesian predictive distributions: par(mfrow=c(2,1) hist(davis.bpred$predictive$simulations[1,]) hist(davis.bpred$predictive$simulations[2,])

#

#

# If you want a nugget prior, instead of nugget=0,

#say nugget= c(1,2,5,6)

# or any other vector with the values of the discrete uniform prior

# of nugget/partial sill. Example: coal.b2<-krige.bayes(coords=coal.m[,2:3],data=coal.m[,4], prior=prior.control(tausq.rel.prior="uniform",

tausq.rel.discrete=seq(0,0.5,l=6),

phi.discrete=seq(0,3,l=25)))

X11() par(mfrow=c(1,4)) hist(coal.b2$posterior$sample$beta) hist(coal.b2$posterior$sample$sigmasq) hist(coal.b2$posterior$sample$phi) hist(coal.b2$posterior$sample$tausq.rel)

#Notice that here "nugget" means relative nugget (nugget divided by the

# sill). So the argument is defining a discrete prior for the relative

# nugget with support points at 0, 10%, 20%, ... 50%

############################################################

##EXERCISE 2: Moving neighborhood kriging

###"Kriging performed in global neighborhood":

# cov.pars : covariance parameters vector (partial sill,range)

# m0 : defines the type of kriging:

# 'sk': simple kriging (no trend)

# 'ok': ordinary kriging (constant trend)

# 'kt': kriging with a trend model(universal)

# kappa : kappa (smoothing) is the smoothing

# parameter for Matern or powered exponential covariance function coal.k<-ksline(coords=coal.m[,2:3],data=coal.m[,4],locations=loci, cov.pars=c(3,1),nugget=0, cov.model="matern", kappa=.5,m0="ok")

##coal.k

coal.k$predict

[1] 10.07305 11.22320

coal.k$krige.var

[1] 2.684456 1.349229

coal.k$beta

[,1]

[1,] 9.752618

coal.k$message:

[1] "Kriging performed in global neighborhood"

##Universal Kriging:

#

# the value of trend here is 2 which means we have a polynomial

# of degree 2 for a two dimensional problem, therefore

# we need to estimate 5 parameters coal.uk<-ksline(coords=coal.m[,2:3],data=coal.m[,4],locations=loci, cov.pars=c(3,1),nugget=0, cov.model="matern", kappa=.5,m0="kt",trend=2) coal.uk$predict

[1] 10.77988 11.24153 coal.uk$krige.var

[1] 3.023352 1.349461 coal.uk$beta

coefficients:

[1,] 1.124853e+01 (1)

[2,] -2.090557e-01 (x)

[3,] -1.381999e-02 (y)

[4,] -2.817476e-03 (x^2)

[5,] -9.021021e-04 (y^2)

[6,] 6.312104e-03 (x*y)

### kriging performed in moving neighbourhood:

coal.wind <-ksline(coords=coal.m[,2:3],data=coal.m[,4],locations=loci, cov.pars=c(3,1),nugget=0, cov.model="matern", kappa=.5,m0="ok",nwin=5)

#nwin number of closest neighbors coal.wind

$predict:

[1] 10.43828 10.84857

$krige.var:

[1] 3.136432 1.357889

###########################################3

# linear trend not subregions:

# when trend is one implies a linear trend in two dimensions

# therefore we have 3 parameters to estimate coal.wind1 <-ksline(coords=coal.m[,2:3],data=coal.m[,4],locations=loci, cov.pars=c(3,1),nugget=0, cov.model="matern", kappa=.5,m0="kt",trend=1,nwin="full")

> coal.wind1 coal.wind1$predict

[1] 10.62524 11.23732 coal.wind1$krige.var

[1] 2.774784 1.349341 coal.wind1$beta

[,1]

[1,] 10.86216801

[2,] -0.16286289

[3,] 0.01077067

### kriging performed in moving neighborhood with linear trend coal.wind2 <-ksline(coords=coal.m[,2:3],data=coal.m[,4],locations=loci, cov.pars=c(3,1),nugget=0, cov.model="matern", kappa=.5,m0="kt",trend=1,nwin=5) coal.wind2 coal.wind2$predict

[1] 9.669875 10.876154 coal.wind2$krige.var

[1] 8.350462 1.362241 t(coal.wind2$beta[1:2,])

[,1] [,2] [,3]

[1,] 7.467317 0.4058152 0.3430783

[2,] 7.030745 0.4716316 0.3130314

######################

## EXERCISE 3: ANISOTROPY.

#The function coords.aniso transform your coordinates

# into the coordinates in the new space where we have isotropy

# first one is the rotation angle (in radians) (psiA) and the second is psiR

# where psiR is

# the stretching parameter

# and it is greater than 1, if lambda is 1 there is not

# stretching (these parameters can be estimated with likfit)

#

# new.coord<-coords.aniso(coal.m[,2:3],aniso.pars=c(.2,3))

# do kriging for anisotropic case

coal.wind2 <-ksline(coords=coal.m[,2:3],data=coal.m[,4],locations=loci, cov.pars=c(3,1),nugget=0, cov.model="exponential", m0="kt",trend=1,aniso.pars=c(.2,3))

#################################

##EXERCISE 4: Maps and Images

#use library akima and fields

X11()

ozone.txt<-read.table('ozone.txt')

rx <- range(ozone.txt[,1])

ry <- range(ozone.txt[,2])

#to obtain map of us within the limits rx (long) and ry (lat):

par(mfrow=c(1,1))

US(xlim = rx, ylim = ry, lwd = 2, col = 1,add=F)

surf<- interp(ozone.txt[,1], ozone.txt[,2], ozone.txt[,3])

#to create an image:

image(surf$x,surf$y,surf$z, add = F,graphics.reset=T,col=topo.colors(12))

#to add points in the image:

text(ozone.txt[,1:2], labels=ozone.txt[,3])

US(xlim = rx, ylim = ry, lwd = 2, col = 1,add=T)

#

#

#to create a 3-d plot:

persp(surf$x,surf$y,surf$z)

# to get a contour:

contour(surf$x,surf$y,surf$z)

#to add a legend to the image:

image.plot(surf$x,surf$y,surf$z, add = F,graphics.reset=T,col=topo.colors(12))

#

#DESCRIPTION of image:

# Creates an image, under some graphics devices, of shades

# of gray or colors that represent a third dimension.

#

#USAGE:

# image(x, y, z, zlim = range(z), add = F)

#

#REQUIRED ARGUMENTS:

#x: vector containing x coordinates of grid over which z' is

# evaluated. The values should be in increasing order;

# missing values are not accepted.

#y: vector of grid y coordinates. The values should be in

# increasing order; missing values are not accepted.

#z: is a matrix, z[i,j]' is

# evaluated at x[i]', y[j]'). The rows of z' are indexed by

# x', and the columns by y'. Missing values (NA's) are

# allowed.

#####

#

# interp: Interpolates the value of the third variable onto an

# evenly spaced grid of the first two variables. default is 40x40

# grid

#to save the plot as a postscript file: postscript(file="example.ps")

image.plot(surf$x,surf$y,surf$z, add = F,graphics.reset=T,col=topo.colors(12)) dev.off()

Download