Supplementary material: ## ## A Smoothed ANOVA model for

advertisement
#######################################################################
## Supplementary material:
##
## A Smoothed ANOVA model for multivariate ecological regression
##
## Marc Marí-Dell'Olmo(1,2,3,4), Miguel Ángel Martínez-Beneito(5,1), ##
## Mercè Gotsens(1,2,3), Laia Palència(1,2,3)
##
## 1 CIBER Epidemiología y Salud Pública (CIBERESP). Spain.
##
## 2 Agència de Salut Pública de Barcelona. Barcelona.
##
## 3 Institut d'Investigació Biomèdica (IIB Sant Pau). Barcelona.
##
## 4 Doctorado en Biomedicina, UPF
##
## 5 Centro Superior de Investigación en Salud Pública. Valencia.
##
#######################################################################
#For installation INLA library
#source("http://www.math.ntnu.no/inla/givemeINLA.R")
#libraries
library(INLA)
#We use to upgrade INLA to the testing-version
#inla.upgrade(testing=TRUE)
#Working directory#
setwd("C:/Estomago/Resultados/")
#Read data
load("C:/Estomago/Dades/barcelona_interna.RData")
#We obtain de observed (O) and the expected (E) cases for each area and period
#The expected cases were calculated by indirect standardization.
O <- grupo_e[[2]][[1]][,4]
E <- grupo_e[[2]][[2]][,4]
#We obtain the covariate C (deprivation index)
C <- indic$indice[order(indic$SECCION)]
#We obtain the period variable (1 = 1st period, 2 = 2nd period)
Period <- grupo_e[[2]][[1]][[1]]
#we create the variables to obtain the model constants mu1 and mu2
mu1 <- Period
mu1[mu1==1] <- 1
mu1[mu1==2] <- 1
mu2 <- Period
mu2[mu2==1] <- -1
mu2[mu2==2] <- 1
#Number of areas
n.areas <- length(O)/2
#Index for the random effects
region.p1.p2 <- c(c(1:n.areas), c(1:n.areas))
#Weights for the random effects
W <- c(rep(-1,n.areas),rep(1,n.areas))
#We have categorized the covariate C (Deprivation) into 20 quantile-groups (K=20)
C.20 <- inla.group(C, n=20, method="quantile")
f.C.20 <- c(C.20, C.20)
#Final model data frame
Model.Data <- data.frame(region.H.2=region.p1.p2, region.H.1=region.p1.p2, O=O, E=E,
mu1=mu1, mu2=mu2, f.C.H.2=f.C.20, f.C.H.1=f.C.20, W=W)
#We are imposing K=20 restriction
#for each one of the groups that we have used to define f(C), we require the sum of the
corresponding random effects to be 0.
#It can be shown that this set of restrictions leads to the orthogonality of f(C) and S.
#We make the matrix A and the vector e to add extra constraints into the INLA model.
Dep.idx <- inla.group(C, n=20, method="quantile", idx.only=T)
A <- matrix(0, nrow=20, ncol=n.areas)
for (m in 1:20){ A[m,which(Dep.idx==m)] <- 1}
A2 <- matrix(0, nrow=20, ncol=n.areas)
A <- cbind(A,A2)
e <- rep(0,20)
1
# A half-normal distribution with mean 0 and precision 0.0001 was assigned to the
standard deviations of the random effects
hyper.iid <- list(theta=list(prior="logtgaussian",param=c(0,0.0001)))
hyper.besag <- list(theta=list(prior="logtgaussian",param=c(0,0.0001)))
#Model formula
formula1 <- O ~ f(region.H.1,model="bym",
graph="C:/Estomago/Resultados/Barcelona_nb.inla", constr=TRUE,
extraconstr=list(A=A,e=e), hyper=c(hyper.iid, hyper.besag), rankdef=21) + f(region.H.2 ,
W, model="bym", graph="C:/Estomago/Resultados/Barcelona_nb.inla", constr=TRUE,
extraconstr=list(A=A,e=e), hyper=c(hyper.iid, hyper.besag), rankdef=21) + f(f.C.H.1,
model="rw1", hyper=hyper.iid) + f(f.C.H.2, W, model="rw1", hyper=hyper.iid) + mu1 + mu2
-1
#We have used the option model=”bym”
#This model is simply the sum of two effects, one with an Instrinsic CAR prior (spatial
model) and the other with independent values (iid model).
##Linear combinations to obtain the values represented into the figures
#Linear combination to represent the figure 1(c)
lc.rw1.p1 = list()
for (m in 1:20){
aux <- matrix(data = NA, nrow = 20, ncol=1)
aux[m,1] <- 1
lc = inla.make.lincomb(f.C.H.2 = as.vector(-aux[,1]), f.C.H.1 = as.vector(aux[,1]))
names(lc) = paste("lc.rw1.p1.", inla.num(m),sep="")
lc.rw1.p1 = c(lc.rw1.p1, lc)
}
#Linear combination to obtain figure 1(d)
lc.rw1.p2 = list()
for (m in 1:20){
aux <- matrix(data = NA, nrow = 20, ncol=1)
aux[m,1] <- 1
lc = inla.make.lincomb(f.C.H.2 = as.vector(aux[,1]), f.C.H.1 = as.vector(aux[,1]))
names(lc) = paste("lc.rw1.p2.", inla.num(m),sep="")
lc.rw1.p2 = c(lc.rw1.p2, lc)
}
#Linear combination to obtain figure 2(a)
idx.Priv <- inla.group(C, n=20, method="quantile", idx.only=T)
lc.rw1.S.p1 = list()
for (n in 1:n.areas){
aux <- rep(NA,(n.areas*2))
aux[n] <- 1
aux.rw <- rep(NA,20)
aux.rw[idx.Priv[n]] <- 1
lc = inla.make.lincomb(region.H.2 = as.vector(-aux), region.H.1 = as.vector(aux),
f.C.H.2 = as.vector(-aux.rw), f.C.H.1 = as.vector(aux.rw))
names(lc) = paste("lc.rw1.S.p1.", inla.num(n),sep="")
lc.rw1.S.p1 = c(lc.rw1.S.p1, lc)
}
#Linear combination to obtain figure 2(b)
idx.Priv <- inla.group(C, n=20, method="quantile", idx.only=T)
lc.rw1.S.p2 = list()
for (n in 1:n.areas){
aux <- rep(NA,(n.areas*2))
aux[n] <- 1
aux.rw <- rep(NA,20)
aux.rw[idx.Priv[n]] <- 1
lc = inla.make.lincomb(region.H.2 = as.vector(aux), region.H.1 = as.vector(aux),
f.C.H.2 = as.vector(aux.rw), f.C.H.1 = as.vector(aux.rw))
names(lc) = paste("lc.rw1.S.p2.", inla.num(n),sep="")
lc.rw1.S.p2 = c(lc.rw1.S.p2, lc)
}
all.lc = c(lc.rw1.p1, lc.rw1.p2, lc.rw1.S.p1, lc.rw1.S.p2)
results <- inla(formula1, family="poisson", data=Model.Data, E=E, lincomb=all.lc ,
control.compute=list(dic=T, cpo=TRUE), control.predictor=list(compute=TRUE,
2
cdf=c(log(1))), control.inla=list(lincomb.derived.only=T, numint.maxfeval = 10^5,
strategy="laplace"), control.fixed=list(mean=0, prec=0.0001))
#A Laplace approximation has been used for approximating some marginal posterior
densities. This is the most time consuming approximation but on the other hand is the
most accurate (see Appendix paper).
#Posterior marginals for the hyperparameters have been obtained using central composite
design (CCD) strategy. This strategy needs much less computational time and the
differences between CCD and GRID strategy are minor. Moreover, the CCD strategy is
recommended for problems with high dimensionality of the hyperparameter vector.
save(results, file="results.RData")
###################################################################################
###################################################################################
#Decomposition of the variance of the logarithms of risk as the sum of the variances of
all the model components
mu1.m<-rep(results$summary.fixed[1,"mean"],n.areas*2)
mu2.m<-c(-rep(results$summary.fixed[2,"mean"],n.areas),
rep(results$summary.fixed[2,"mean"],n.areas))
RW.20 <- results$summary.random$f.C.H.1$mean
f1.m<-rep(RW.20[idx.Priv],2)
RW.20.2 <- results$summary.random$f.C.H.2$mean
f2.m<-c(-RW.20.2[idx.Priv],RW.20.2[idx.Priv])
S1.m<-rep(results$summary.random$region.H.1$mean[1:n.areas],2)
S2.m<-c(results$summary.random$region.H.2$mean[1:n.areas],results$summary.random$region.H.2$mean
[1:n.areas])
(c(var(f1.m),var(S1.m),var(mu2.m),var(f2.m),var(S2.m))/
(var(f1.m) + var(S1.m)+ var(mu2.m)+ var(f2.m)+ var(S2.m)))*100
3
Download