SUPPLEMENTARY MATERIALS Proportion of symptoms in cases that are due to cancer The CAPER study which is the source of the data used here recorded symptom presentations in primary care in cases over the 24 months before their cancer was diagnosed. The probability that a symptom in a patient with cancer was actually caused by cancer that we can estimate directly from the CAPER study does not correspond with what would be observed in a randomly chosen 2 year period. We have estimated what would be observed in practice, based on the following assumptions: 1. The mean sojourn time of both cancers is 4 years (see discussion). 2. The rate of symptom presentation in cases during months 25-36 prior to diagnosis is the same as during months 12-24. 3. The rate of symptom presentation in cases during months 37-48 is the same as in the controls; in other words in cases none of the symptoms presenting in the first 12 months are caused by cancer. Our calculation assumes that the sojourn time of lung and colorectal cancer is 4 years in every individual. Although this is not plausible, our method will still give approximately correct answers as long longer sojourn times are associated with proportionately longer SLTs. However, it should be noted that the relationship between SLT and sojourn time is not known. WinBUGS code, data and initial values Below we give the full WinBUGS model code, with initial values, and data for the “Any Symptoms” investigative criteria in colorectal cancer. Convergence occurred using the Brooks Gelman criteria (Brooks & Gelman, 1998) within 5000 iterations. Accordingly the first 10,000 samples were discarded, and we sampled 30,000 from each of two chains. model{ for (g in 1:5) { casy[g] <- sum(ca[g,]) casy[g] ~ dpois(thetaca[g]) thetaca[g] <- lamca[g] * nca[g] lamca[g] ~ dunif(0,10) cosy[g] ~ dpois(thetaco[g]) thetaco[g] <- lamco[g] * nco[g] * ntp lamco[g] ~ dunif(0,10) # likelihood cases # 2-year incidence in cases # likelihood controls # incidence in controls, per time interval pi[g,1:ntp] ~ ddirch(ca[g,1:ntp]) for (t in 1:ntp) { lam[g,t] <- pi[g,t] * lamca[g] # total incidence in cases lamc[g,t] <- max(0,lam[g,t] - lamco[g]) # cancer specific incidence alpha[g,t] <- lamc[g,t]/sum(lamc[g,]) # SLT distribution pc[g,t] <- lamc[g,t] / lam[g,t] # prob symptom in case caused by cancer } } for (g in 1:5) {mslt[g] <- inprod2(T[13:24],alpha[g,1:12]) mpc[g] <- sum(lamc[g,1:12]) / sum(lam[g,1:12]) anca[g] <- 100*((thetaca[g] / 2) / nca[g]) anco[g] <- 100*((thetaco[g] / 2) / nco[g]) } mslt[6] <- inprod2(nca[1:5],mslt[1:5]) / sum(nca[1:5]) mpc[6] <- inprod2(nca[1:5],mpc[1:5]) / sum(nca[1:5]) anca[6] <- inprod2(nca[1:5],anca[1:5]) / sum(nca[1:5]) anco[6] <- inprod2(nco[1:5],anco[1:5]) / sum(nco[1:5]) # mean pr(cancer-related) # annual incidence cases # annual incidence controls # average SLT all stages # average pr(cancer-related) # av annual incidence cases # av annual incidence controls for (t in 1:ntp) { alpha[6,t] <- inprod(alpha[1:5,t],nca[1:5])/sum(nca[1:5]) # av SLT distribution pc[6,t] <- inprod(pc[1:5,t],nca[1:5])/sum(nca[1:5])} # av probability of ca # projection to a 4 year period, same rate in year 2 as months 1-12, nothing in year 1 for (g in 1:5) { for (t in 7:12) {xlamc[g,t] <- mean(lamc[g,1:6]) # cancer-specific incidence xlam[g,t] <- mean(lam[g,1:6]) } # total incidence in cases for (t in 1:6) {xlamc[g,t] <- 0 xlam[g,t] <- lamco[g] } for (t in 13:24) {xlamc[g,t] <- lamc[g,t-12] xlam[g,t] <- lam[g,t-12] } for (t in 1:24) {xalpha[g,t] <- xlamc[g,t] / sum(xlamc[g,1:24]) } # SLT distribution mslt[g+6] <- inprod2(T[1:24],xalpha[g,1:24]) mpc[g+6] <- sum(xlamc[g,1:24]) / sum(xlam[g,1:24]) } mslt[12] <- inprod2(nca[1:5],mslt[7:11]) / sum(nca[1:5]) # cancer related slt 4y mpc[12] <- inprod2(nca[1:5],mpc[7:11]) / sum(nca[1:5]) # overall prob symtpms caused by cancer, 4yr } INITIAL VALUES list(lamco=c(.1,.1,.1,.1,.1), lamca=c(.1,.1,.1,.1,.1)) list(lamco=c(1,1,1,1,1), lamca=c(1,1,1,1,1)) DATA # any symptom list(cosy=c(87,226,178,71,77), # number of symptoms in cases, Dukes A,B,C,D,Missing ntp=12, # number of time periods, nco=c(230,624,500,175,215), # number of controls by stage nca=c(46,125,100,35,43), # number of cases by stage T=c(47,45,43,41,39,37, 35,33,31,29,27,25, 23,21,19,17,15,13, 11,9,7,5,3,1), # average time to diagnosis # for each time period ca=structure(.Data=c(5,2,4,3,3,1, 6,5,7,5,16,22, # number of symptoms in each time period, Duke’s A 6,5,6,7,4,9, 5,14,16,21,42,77, # “ , Duke’s B 5,6,3,6,6,8, 5,5,10,11,37,57, # “ , Duke’s C 2,1,1,2,4,7, 3,1,3,8,9,25, # “ , Duke’s D 3,3,0.1,1,2,4, 4,2,4,7,8,29),.Dim=c(5,12))) # “ , Missing Reference Brooks SP, Gelman A (1998) Alternative methods for monitoring convergence of iterative simulations. Journal of Computational and Graphical Statistics 7: 434-455