####################################################################### ## Supplementary material: ## ## A Smoothed ANOVA model for multivariate ecological regression ## ## Marc Marí-Dell'Olmo(1,2,3,4), Miguel Ángel Martínez-Beneito(5,1), ## ## Mercè Gotsens(1,2,3), Laia Palència(1,2,3) ## ## 1 CIBER Epidemiología y Salud Pública (CIBERESP). Spain. ## ## 2 Agència de Salut Pública de Barcelona. Barcelona. ## ## 3 Institut d'Investigació Biomèdica (IIB Sant Pau). Barcelona. ## ## 4 Doctorado en Biomedicina, UPF ## ## 5 Centro Superior de Investigación en Salud Pública. Valencia. ## ####################################################################### #For installation INLA library #source("http://www.math.ntnu.no/inla/givemeINLA.R") #libraries library(INLA) #We use to upgrade INLA to the testing-version #inla.upgrade(testing=TRUE) #Working directory# setwd("C:/Estomago/Resultados/") #Read data load("C:/Estomago/Dades/barcelona_interna.RData") #We obtain de observed (O) and the expected (E) cases for each area and period #The expected cases were calculated by indirect standardization. O <- grupo_e[[2]][[1]][,4] E <- grupo_e[[2]][[2]][,4] #We obtain the covariate C (deprivation index) C <- indic$indice[order(indic$SECCION)] #We obtain the period variable (1 = 1st period, 2 = 2nd period) Period <- grupo_e[[2]][[1]][[1]] #we create the variables to obtain the model constants mu1 and mu2 mu1 <- Period mu1[mu1==1] <- 1 mu1[mu1==2] <- 1 mu2 <- Period mu2[mu2==1] <- -1 mu2[mu2==2] <- 1 #Number of areas n.areas <- length(O)/2 #Index for the random effects region.p1.p2 <- c(c(1:n.areas), c(1:n.areas)) #Weights for the random effects W <- c(rep(-1,n.areas),rep(1,n.areas)) #We have categorized the covariate C (Deprivation) into 20 quantile-groups (K=20) C.20 <- inla.group(C, n=20, method="quantile") f.C.20 <- c(C.20, C.20) #Final model data frame Model.Data <- data.frame(region.H.2=region.p1.p2, region.H.1=region.p1.p2, O=O, E=E, mu1=mu1, mu2=mu2, f.C.H.2=f.C.20, f.C.H.1=f.C.20, W=W) #We are imposing K=20 restriction #for each one of the groups that we have used to define f(C), we require the sum of the corresponding random effects to be 0. #It can be shown that this set of restrictions leads to the orthogonality of f(C) and S. #We make the matrix A and the vector e to add extra constraints into the INLA model. Dep.idx <- inla.group(C, n=20, method="quantile", idx.only=T) A <- matrix(0, nrow=20, ncol=n.areas) for (m in 1:20){ A[m,which(Dep.idx==m)] <- 1} A2 <- matrix(0, nrow=20, ncol=n.areas) A <- cbind(A,A2) e <- rep(0,20) 1 # A half-normal distribution with mean 0 and precision 0.0001 was assigned to the standard deviations of the random effects hyper.iid <- list(theta=list(prior="logtgaussian",param=c(0,0.0001))) hyper.besag <- list(theta=list(prior="logtgaussian",param=c(0,0.0001))) #Model formula formula1 <- O ~ f(region.H.1,model="bym", graph="C:/Estomago/Resultados/Barcelona_nb.inla", constr=TRUE, extraconstr=list(A=A,e=e), hyper=c(hyper.iid, hyper.besag), rankdef=21) + f(region.H.2 , W, model="bym", graph="C:/Estomago/Resultados/Barcelona_nb.inla", constr=TRUE, extraconstr=list(A=A,e=e), hyper=c(hyper.iid, hyper.besag), rankdef=21) + f(f.C.H.1, model="rw1", hyper=hyper.iid) + f(f.C.H.2, W, model="rw1", hyper=hyper.iid) + mu1 + mu2 -1 #We have used the option model=”bym” #This model is simply the sum of two effects, one with an Instrinsic CAR prior (spatial model) and the other with independent values (iid model). ##Linear combinations to obtain the values represented into the figures #Linear combination to represent the figure 1(c) lc.rw1.p1 = list() for (m in 1:20){ aux <- matrix(data = NA, nrow = 20, ncol=1) aux[m,1] <- 1 lc = inla.make.lincomb(f.C.H.2 = as.vector(-aux[,1]), f.C.H.1 = as.vector(aux[,1])) names(lc) = paste("lc.rw1.p1.", inla.num(m),sep="") lc.rw1.p1 = c(lc.rw1.p1, lc) } #Linear combination to obtain figure 1(d) lc.rw1.p2 = list() for (m in 1:20){ aux <- matrix(data = NA, nrow = 20, ncol=1) aux[m,1] <- 1 lc = inla.make.lincomb(f.C.H.2 = as.vector(aux[,1]), f.C.H.1 = as.vector(aux[,1])) names(lc) = paste("lc.rw1.p2.", inla.num(m),sep="") lc.rw1.p2 = c(lc.rw1.p2, lc) } #Linear combination to obtain figure 2(a) idx.Priv <- inla.group(C, n=20, method="quantile", idx.only=T) lc.rw1.S.p1 = list() for (n in 1:n.areas){ aux <- rep(NA,(n.areas*2)) aux[n] <- 1 aux.rw <- rep(NA,20) aux.rw[idx.Priv[n]] <- 1 lc = inla.make.lincomb(region.H.2 = as.vector(-aux), region.H.1 = as.vector(aux), f.C.H.2 = as.vector(-aux.rw), f.C.H.1 = as.vector(aux.rw)) names(lc) = paste("lc.rw1.S.p1.", inla.num(n),sep="") lc.rw1.S.p1 = c(lc.rw1.S.p1, lc) } #Linear combination to obtain figure 2(b) idx.Priv <- inla.group(C, n=20, method="quantile", idx.only=T) lc.rw1.S.p2 = list() for (n in 1:n.areas){ aux <- rep(NA,(n.areas*2)) aux[n] <- 1 aux.rw <- rep(NA,20) aux.rw[idx.Priv[n]] <- 1 lc = inla.make.lincomb(region.H.2 = as.vector(aux), region.H.1 = as.vector(aux), f.C.H.2 = as.vector(aux.rw), f.C.H.1 = as.vector(aux.rw)) names(lc) = paste("lc.rw1.S.p2.", inla.num(n),sep="") lc.rw1.S.p2 = c(lc.rw1.S.p2, lc) } all.lc = c(lc.rw1.p1, lc.rw1.p2, lc.rw1.S.p1, lc.rw1.S.p2) results <- inla(formula1, family="poisson", data=Model.Data, E=E, lincomb=all.lc , control.compute=list(dic=T, cpo=TRUE), control.predictor=list(compute=TRUE, 2 cdf=c(log(1))), control.inla=list(lincomb.derived.only=T, numint.maxfeval = 10^5, strategy="laplace"), control.fixed=list(mean=0, prec=0.0001)) #A Laplace approximation has been used for approximating some marginal posterior densities. This is the most time consuming approximation but on the other hand is the most accurate (see Appendix paper). #Posterior marginals for the hyperparameters have been obtained using central composite design (CCD) strategy. This strategy needs much less computational time and the differences between CCD and GRID strategy are minor. Moreover, the CCD strategy is recommended for problems with high dimensionality of the hyperparameter vector. save(results, file="results.RData") ################################################################################### ################################################################################### #Decomposition of the variance of the logarithms of risk as the sum of the variances of all the model components mu1.m<-rep(results$summary.fixed[1,"mean"],n.areas*2) mu2.m<-c(-rep(results$summary.fixed[2,"mean"],n.areas), rep(results$summary.fixed[2,"mean"],n.areas)) RW.20 <- results$summary.random$f.C.H.1$mean f1.m<-rep(RW.20[idx.Priv],2) RW.20.2 <- results$summary.random$f.C.H.2$mean f2.m<-c(-RW.20.2[idx.Priv],RW.20.2[idx.Priv]) S1.m<-rep(results$summary.random$region.H.1$mean[1:n.areas],2) S2.m<-c(results$summary.random$region.H.2$mean[1:n.areas],results$summary.random$region.H.2$mean [1:n.areas]) (c(var(f1.m),var(S1.m),var(mu2.m),var(f2.m),var(S2.m))/ (var(f1.m) + var(S1.m)+ var(mu2.m)+ var(f2.m)+ var(S2.m)))*100 3