Fuentes’ LAB NOTES: www4.stat.ncsu.edu/~fuentes/dukelab DATASETS NEEDED: coalash.dt (Coal Ash dataset) www4.stat.ncsu.edu/~fuentes/coalash.txt davis.txt (topographic heights) www4.stat.ncsu.edu/~fuentes/davis.txt ozone.txt (ozone values) www4.stat.ncsu.edu/~fuentes/ozone.txt climate.txt (climate data frame) www4.stat.ncsu.edu/~fuentes/climate.txt Named the 3 files coalash.txt, davis.txt and ozone.txt in your directory. software R with library geoR, fields and akima These packages are not compatible with each other Use command detach(package:fields) to detach packages. This lab has 2 parts: Part I: spatial estimation, and Part II: Bayesian spatial estimation and prediction PART I Estimating the spatial structure ##EXERCISE ##EXERCESE ##EXERCISE ##EXERCISE 1: 2: 3: 4: How to plot the data and get empirical semivariograms WNLS VARIOGRAM ESTIMATOR REML Profile likelihood ######################################################################## #Using geoR library # download your data using read.table # to learn more about read.table type # > ? read.table coal.ash<-read.table('coalash.txt') coal.m<-as.matrix(coal.ash) davis.txt<-read.table('davis.txt') davis.m<-as.matrix(davis.txt)[,1:3] ##EXERCISE 1: How to plot the data and get binned semivariograms #TO PLOT THE DATA X11() plot.geodata(coords=coal.m[,2:3],data=coal.m[,4]) # The plots returned are: ## - A plot with data locations. Symbols, theirs sizes (and colors) sepatates data from diferent quartiles as follows ### (circles) : 1st quantile ### (triangles) : 2nd ### (plus) : 3rd ### (crosses) : 4th ## - A plot with data-values agains coordinate X ## - A plot with data-values agains coordinate Y #TO PRODUCE A VARIOGRAM CLOUD cloud1<-variog(coords=coal.m[,2:3],data=coal.m[,4] cloud2<-variog(coords=coal.m[,2:3],data=coal.m[,4] estimator.type='modulus') ,option='cloud') ,option='cloud', par(mfrow=c(1,2)) plot(cloud1) plot(cloud2) bin1<-variog(coords=coal.m[,2:3],data=coal.m[,4], bin.cloud=T,uvec=seq(0,10,l=11)) ## uvec : n-element vector of values to define the binning; ## the values of uvec defines the center of the bins bin2<-variog(coords=coal.m[,2:3],data=coal.m[,4], bin.cloud=T,estimator.type='modulus',uvec=seq(0,10,l=11)) par(mfrow=c(1,3)) plot(bin1) plot(bin1,bin.cloud=T) plot(bin2) #with Davis data: bin.davis<-variog(coords=davis.m[,1:2],data=davis.m[,3], bin.cloud=T,uvec=seq(0,4,l=11)) plot(bin.davis) ############################################################################# ##EXERCESE 2: WNLS ESTIMATOR bin3<-variog(coords=coal.m[,2:3],data=coal.m[,4]) wls<-variofit(bin3, ini.cov.pars=c(.5,3), fix.nugget=F, nugget=0,cov.model="exponential") summary(wls) #output of wls # THE WEIGHTS ARE OBTAINED FROM AN ITERATIVE, NONLINEAR ESTIMATION # ROUTIME, using 'nlminb', # nlminb is based on the Fortran functions dmnfb, dmngb, and # dmnhb (Gay (1983; 1984), A T & T (1984)) from NETLIB # (Dongarra and Grosse (1987)). # # ini: initial values of the parameters: partial sill and range # # covariance model, you can choose: "exponential", "matern", "gaussian", # "spherical", "wave", "powered.exponential" # # if you choose the mattern then the smoothing parameter is kappa # here kappa is .5 (the default) # # You can say fix.nugget=T then the nugget will be 0 and # you do not need to initizalize the nugget only the partial sill # and range, therefore ini=c(.5,.3) where .5 is the initial value # for partial sill and .3 is the initial value for range. > wls$nugget: [1] 1.013864 $cov.pars: [1] 1.193859 10.445359 ######################################################################## ##EXERCISE 3: REML #TO OBTAIN THE REML AND ML ESTIMATORS #use likfit #ini to initialize parameters # partial sill and range # kappa is power for powered exponential model # kappa is smoothing for Matern #method is ML or REML #trend is cte, but you can also choose a linear # trend by saying trend=1 or quadratic trend=2 coal.ml<- likfit(coords=coal.m[,2:3],data=coal.m[,4], fix.nugget=T, ini=c(.5,.5),kappa=.5, trend ="cte",method='ML',cov.model="powered.exponential") coalani.ml<- likfit(coords=coal.m[,2:3],data=coal.m[,4], fix.nugget=T, ini=c(.5,.5),kappa=.5, fix.psiA = FALSE, psiA = 0, fix.psiR = FALSE, psiR = 1, trend ="cte",method='ML',cov.model="powered.exponential") summary(coal.ml) #output: covariance model: powered.exponential with kappa = 0.5 nugget sill range 0 1.564391 0.6769796 REML covariance model: exponential nugget sill range 0 1.599048 0.8074612 coal.ml<- likfit(coords=coal.m[,2:3],data=coal.m[,4], fix.nugget=T, ini=c(.5,.5), trend ="cte",method='ML',cov.model="exponential") ML: covariance model: exponential nugget sill range 0 1.576983 0.7874607 REML covariance model: gaussian nugget sill range 0 1.567491 0.7852996 summary(coalani.ml) Parameters of the mean component (trend): beta 9.7224 Parameters of the spatial component: correlation function: powered.exponential (estimated) variance parameter sigmasq (partial sill) = 1.504 (estimated) cor. fct. parameter phi (range parameter) = 0.5439 (fixed) extra parameter kappa = 0.5 anisotropy parameters: (estimated) anisotropy angle = 0.6617 ( 38 degrees ) (estimated) anisotropy ratio = 2.020 ############################################################################ EXERCISE 4: PROFILE LIKELIHOOD # profile log-likelihood for sill and range, here nugget is fixed #first we give range of values to get the likelihood for sill -- range: coal.ml<- likfit(coords=coal.m[,2:3],data=coal.m[,4], fix.nugget=T, ini=c(.5,.5), trend ="cte",method='ML',cov.model="exponential") #now we get the profile likelihood: coal.prof<-proflik(coal.ml, coords=coal.m[,2:3],data=coal.m[,4], sill.values=seq(0.1,5,length=5)) #this takes a couple of minutes # to plot the profile plot.proflik(coal.prof) ################################### +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ PART II Bayesian Spatial estimation and prediction: ##EXERCISE 1: Bayesian Estimation of covariance parameters ##EXERCISE 2: Moving neighborhood kriging ##EXERCISE 3: ANISTROPY ##EXERCISE 4: Maps and Images ######################################################################## # download your data using read.table # to learn more about read.table type # > ? read.table coal.ash<-read.table('coalash.txt') coal.m<-as.matrix(coal.ash) davis.txt<-read.table('davis.txt') davis.m<-as.matrix(davis.txt) ################################### ##EXERCISE 1: Estimating model parameters ###A. Bayesian estimates of model parameters: #Function: krige.bayes # You obtain posteriors for the trend, range, partial # Recommended (default) priors: # uniform for beta, 1/sigma^2 for the partial sill # and discrete uniform for the range and nugget. # sill and nugget # aniso.pars are the parameters for anisotropy: stretching and rotation angle. # # the default is with nugget 0, but you # can also get a posterior for the nugget when you give a prior to the nugget, # by saying nugget.prior="uniform" # #Additional information: #---------------------# This function computes (Bayesian) estimates of a spatial linear #model and performs Bayesian and/or kriging prediction in a set of #locations specified by the user. # Priors options for mean and/or covariance parameters # coords : data coordinates (vector for 1d or matrix for 2d data) # data : vector with data values # locations : coordinates of points to be estimated (vector for 1d or # matrix for 2d data). If locations='no' only model parameters estimates are #returned if the full bayesian model is considered. # trend.d : trend data in data locations. Default is constant trend. #The options '1st' or '2nd' builts a first or second degree polinomial. # trend.l : trend data in locations to be estimated. Default #is constant trend. The options '1st' or '2nd' builts a first or second #degree polinomial. # #Example: coal.by1<-krige.bayes( coords=coal.m[,2:3],data=coal.m[,4], prior=prior.control(phi.discrete=seq(0, 3, l=21))) #nugget is fixed to zero or anyother value (here 0): # # see the histograms of the posteriors # for the mean, the partial sill and the range: X11() par(mfrow=c(1,3)) hist(coal.by1$posterior$sample$beta) hist(coal.by1$posterior$sample$sigmasq) hist(coal.by1$posterior$sample$phi) # to see your function type: coal.by1$call #posterior for MEAN coal.by1$posterior$beta$summary # mean median mode.cond # 9.740055 9.740496 9.758056 #posterior for partial sill coal.by1$posterior$sigmasq # mean median mode.cond # 9.740055 9.740496 9.758056 #posterior for range coal.by1$posterior$phi # mean median mode # 0.8645331 0.7894737 0.7894737 ###B. Bayesian prediction: #USE ksline for prediction, this function is still very slow, #though it is being updated. #locations where we want Bayesian prediction: loci<-matrix(c(2,3.5,4,5.5), ncol=2,byrow=T) loci [,1] [,2] [1,] [2,] 2 4 3.5 5.5 # this is the same as before for krige.bayes, but now we # specify where you want to predict, by using the locations argument: davis.bpred<krige.bayes( coords=davis.m[,1:2],data=davis.m[,3], locations=loci,prior=prior.control(phi.discrete=seq(0, 3, l=21))) davis.bpred$predictive$simulations davis.bpred$predictive$mean davis.bpred$predictive$variance # 803.0342 735.7671 # 369.3989 461.2960 #Bayesian predictive distributions: par(mfrow=c(2,1) hist(davis.bpred$predictive$simulations[1,]) hist(davis.bpred$predictive$simulations[2,]) # # # If #say # or # of you want a nugget prior, instead of nugget=0, nugget= c(1,2,5,6) any other vector with the values of the discrete uniform prior nugget/partial sill. Example: coal.b2<-krige.bayes(coords=coal.m[,2:3],data=coal.m[,4], prior=prior.control(tausq.rel.prior="uniform", tausq.rel.discrete=seq(0,0.5,l=6), phi.discrete=seq(0,3,l=25))) X11() par(mfrow=c(1,4)) hist(coal.b2$posterior$sample$beta) hist(coal.b2$posterior$sample$sigmasq) hist(coal.b2$posterior$sample$phi) hist(coal.b2$posterior$sample$tausq.rel) #Notice that here tausq.rel means relative nugget (nugget divided by the # partial sill). So the argument is defining a discrete prior for the relative # nugget with support points at 0, 10%, 20%, ... 50% ############################################################ ##EXERCISE 2: Moving neighborhood kriging ###"Kriging performed in global neighborhood": # cov.pars : covariance parameters vector (partial sill,range) # m0 : defines the type of kriging: # 'sk': simple kriging (no trend) # 'ok': ordinary kriging (constant trend) # 'kt': kriging with a trend model(universal) # kappa : kappa (smoothing) is the smoothing # parameter for Matern or powered exponential covariance function coal.k<-ksline(coords=coal.m[,2:3],data=coal.m[,4],locations=loci, cov.pars=c(3,1),nugget=0, cov.model="matern", kappa=.5,m0="ok") ##coal.k coal.k$predict [1] 10.07305 11.22320 coal.k$krige.var [1] 2.684456 1.349229 coal.k$beta [,1] [1,] 9.752618 coal.k$message: [1] "Kriging performed in global neighborhood" ##Universal Kriging: # # the value of trend here is 2 which means we have a polynomial # of degree 2 for a two dimensional problem, therefore # we need to estimate 5 parameters coal.uk<-ksline(coords=coal.m[,2:3],data=coal.m[,4],locations=loci, cov.pars=c(3,1),nugget=0, cov.model="matern", kappa=.5,m0="kt",trend=2) coal.uk$predict [1] 10.77988 11.24153 coal.uk$krige.var [1] 3.023352 1.349461 coal.uk$beta coefficients: [1,] 1.124853e+01 [2,] -2.090557e-01 (1) (x) [3,] -1.381999e-02 [4,] -2.817476e-03 [5,] -9.021021e-04 [6,] 6.312104e-03 (y) (x^2) (y^2) (x*y) ### kriging performed in moving neighbourhood: coal.wind <-ksline(coords=coal.m[,2:3],data=coal.m[,4],locations=loci, cov.pars=c(3,1),nugget=0, cov.model="matern", kappa=.5,m0="ok",nwin=5) #nwin number of closest neighbors coal.wind $predict: [1] 10.43828 10.84857 $krige.var: [1] 3.136432 1.357889 ###########################################3 # linear trend not subregions: # when trend is one implies a linear trend in two dimensions # therefore we have 3 parameters to estimate coal.wind1 <-ksline(coords=coal.m[,2:3],data=coal.m[,4],locations=loci, cov.pars=c(3,1),nugget=0, cov.model="matern", kappa=.5,m0="kt",trend=1,nwin="full") > coal.wind1 coal.wind1$predict [1] 10.62524 11.23732 coal.wind1$krige.var [1] 2.774784 1.349341 coal.wind1$beta [,1] [1,] 10.86216801 [2,] -0.16286289 [3,] 0.01077067 ### kriging performed in moving neighborhood with linear trend coal.wind2 <-ksline(coords=coal.m[,2:3],data=coal.m[,4],locations=loci, cov.pars=c(3,1),nugget=0, cov.model="matern", kappa=.5,m0="kt",trend=1,nwin=5) coal.wind2 coal.wind2$predict [1] 9.669875 10.876154 coal.wind2$krige.var [1] 8.350462 1.362241 t(coal.wind2$beta[1:2,]) [,1] [,2] [,3] [1,] 7.467317 0.4058152 0.3430783 [2,] 7.030745 0.4716316 0.3130314 ###################### ## EXERCISE 3: ANISOTROPY. #The function coords.aniso transform your coordinates # into the coordinates in the new space where we have isotropy # first one is the rotation angle (in radians) (psiA) and the second is psiR # where psiR is # the stretching parameter # and it is greater than 1, if lambda is 1 there is not # stretching (these parameters can be estimated with likfit) # # new.coord<-coords.aniso(coal.m[,2:3],aniso.pars=c(.2,3)) # do kriging for anisotropic case coal.wind2 <-ksline(coords=coal.m[,2:3],data=coal.m[,4],locations=loci, cov.pars=c(3,1),nugget=0, cov.model="exponential", m0="kt",trend=1,aniso.pars=c(.2,3)) ################################# ##EXERCISE 4: Maps and Images #use library akima and fields X11() ozone.txt<-read.table('ozone.txt') rx <- range(ozone.txt[,1]) ry <- range(ozone.txt[,2]) #to obtain map of us within the limits rx (long) and ry (lat): par(mfrow=c(1,1)) US(xlim = rx, ylim = ry, lwd = 2, col = 1,add=F) surf<- interp(ozone.txt[,1], ozone.txt[,2], ozone.txt[,3]) #to create an image: image(surf$x,surf$y,surf$z, add = F,graphics.reset=T,col=topo.colors(12)) #to add points in the image: text(ozone.txt[,1:2], labels=ozone.txt[,3]) US(xlim = rx, ylim = ry, lwd = 2, col = 1,add=T) # # #to create a 3-d plot: persp(surf$x,surf$y,surf$z) # to get a contour: contour(surf$x,surf$y,surf$z) #to add a legend to the image: image.plot(surf$x,surf$y,surf$z, add = F,graphics.reset=T,col=topo.colors(12)) # #DESCRIPTION of image: # Creates an image, under some graphics devices, of shades # of gray or colors that represent a third dimension. # #USAGE: # image(x, y, z, zlim = range(z), add = F) # #REQUIRED ARGUMENTS: #x: vector containing x coordinates of grid over which z' is # evaluated. The values should be in increasing order; # missing values are not accepted. #y: vector of grid y coordinates. The values should be in # increasing order; missing values are not accepted. #z: is a matrix, z[i,j]' is # evaluated at x[i]', y[j]'). The rows of z' are indexed by # x', and the columns by y'. Missing values (NA's) are # allowed. ##### # # interp: Interpolates the value of the third variable onto an # evenly spaced grid of the first two variables. default is 40x40 # grid #to save the plot as a postscript file: postscript(file="example.ps") image.plot(surf$x,surf$y,surf$z, add = F,graphics.reset=T,col=topo.colors(12)) dev.off() ----------------------------------------------------------------------------# Using surface plotting x1<- 5:10 x2<- 1:6 outer( x1,x2,"+")-> f f par(mfrow=c(2,2)) image( x1,x2,f) persp( x1,x2,f) contour( x1,x2,f) # an important list for surface plots look<- list( x=x1, y=x2,z=f) image(look) LOCATE POINTS IN A MAP >library(fields) >US() read.table("climate.txt")->climate points( climate$lon, climate$lat, pch="x") # use period for plot character text( climate$lon, climate$lat, climate$elev) # label by elevation ## xlim and y lim to indicate the limits of your usa graph US( xlim=c(-82, -73), ylim=c(32.5,41)) points( climate$lon, climate$lat, pch="o") # use period for plot character # label by city names but make the text 1/2 the default size. # left justify the label locator(1)-> hold # now click on a point in the plot with the left button hold # gives you the coordinates points( hold, pch="X") # text( hold$x, hold$y, "Here is my favorite place", adj=1)