12/12/2012 prepared by John Xie

advertisement

#####################################################

# 12/12/2012 prepared by John Xie #

#####################################################

# Demonstration of basic applications of #

# HHT algorithm in public health research. #

#####################################################

# import data

# upload R functions of HHT algorithm

load("ChicagoData.rda")

source("HHT-R-code-XIE-Dec2012.txt")

#----------------------------------------------------

# 'dailydeath' is the daily mortality time series of Chicago over

# the period 01/01/1995 to 30/06/2000.

# 1) EMD decomposition and significance test

EEMDR(dailydeath)

imf.dd = EEMDR(dailydeath, graphout =0)

modes = imf.dd$allmode[,2:9]

testimf(modes)

#-----------------

# 2) Ensemble EMD (EEMD) decomposition and significance test

imf.dds = EEMDR(dailydeath, 0.1, 100)

# EEMD runs n* = 100 times of EMD decomposition processes with

# added white noise, then take the average as the final

# decomposition results (i.e. generated IMFs?).

# Therefore, this process takes a few minutes to complete.

smodes = imf.dds$allmode[,2:9]

testimf(smodes)

# Unfortunately, the decomposed results by EEMD process do not

# guarantee they are the true IMFs. We need to do the post processing

# treatment on the EEMD generated IMF-like components.

# Therefore, the results given by 'testimf(smodes)' are not valid because

# the Hilbert Transform is only applicable to IMFs.

# In the post processing step, the EEMD generated 'IMFs' are used

# as the input series to go through a final EMD process to generate the

# IMFs for subsequent analysis.

# EEMD results, hence the post processing results, changes each time

# due to the random fluctuation caused by the added white noise effects.

set.seed(101) # for repeatability

imf.ddp = postEEMD(dailydeath, 0.1, 60)

postmodes = imf.ddp$postmode[,2:9]

testimf(postmodes)

# Note that there are eight IMFs in the EMD decomposition, but

# there are nine IMFs generated after the EEMD step. In general,

# the number of IMFs is equal or less than log2(n) - 1, where n

# is the sample size or data length. Since the non-trend IMFs can

# only come from the lower order (i.e. higher frequency) IMFs,

# we only check the first eight IMFs in the EEMD step. op <- par(mfcol=c(2,1),mar=c(2.5,3,1.5,1),mgp=c(1.5,0.5,0))

testimf(smodes)

title("significance test: EMD results")

testimf(postmodes)

title("significance test: post EEMD results") par(op)

# The huge difference between the EMD results and the post EEMD results

# indicates that mode mixing issue cannot be ignored. Hence, EEMD is necessary.

#-------------------------------------------

# 3) Determination of the non-trend IMFs.

# A visual inspection indicates that the first five IMFs may be considered

# as the non-trend IMFs.

# Do the formal significance test using the following command

testimf(postmodes, nfit=3, wnoise=0) # start from the first three IMFs;

testimf(postmodes, nfit=4, wnoise=0) # include one more IMF each time;

testimf(postmodes, nfit=5, wnoise=0) # All these three tests confirm that

# the first five IMFs are the non-trend components.

# the argument nfit=5 means we believe that first five IMFs need to be considered;

# wnoise = 0 means we found that the white noise assumption does not hold

# (the default setting is wnoise =1).

# Therefore, it is confirmed that the first five IMFs should be identified as

# non-trend IMFs.

# The white noise assumption is not true in this case and this can be verified by

# checking the acf plot of the identified nontrend IMFs.

Dnontrend = imf.dd$allmode[,2] + imf.dd$allmode[,3] + imf.dd$allmode[,4] +

imf.dd$allmode[,5] + imf.dd$allmode[,6]

acf(Dnontrend)

#-----------------

# 4) Calculation of the excess mortality for the 1995 Chicago heave wave period

# (11-18 July, 1995, 8 days).

# create the correct position index for the 1995 heave wave period:

heatwave95 <- c(192:199)

# check the result to make sure the index does cover the heat wave period:

year55[heatwave95]

# [1] 1995 1995 1995 1995 1995 1995 1995 1995

month55[heatwave95]

# [1] 7 7 7 7 7 7 7 7

date55[heatwave95]

# [1] 11 12 13 14 15 16 17 18

# Therefore, the excess mortality is simply

round(sum(Dnontrend[heatwave95]))

# [1] 477

# Because this result is subject to sampling variation, we repeat the same calculation

# procedure 20 times and take the average as our reported result as we did in this paper.

#

# The R code is given as follows. (To complete this 20 calculations took us about 30 minutes

# in a relatively old laptop computer.)

set.seed(101) # for result repeatability

#

extrahwD95 = NULL

heatwave95 <- c(192:199)

for(i in 1:20) {

imf.ddpc = postEEMD(dailydeath,0.1,60)

drandp = imf.ddpc$postmode[,2]+imf.ddpc$postmode[,3]+imf.ddpc$postmode[,4]+

imf.ddpc$postmode[,5]+imf.ddpc$postmode[,6]

extrahwD95 = c(extrahwD95, round(sum(drandp[heatwave95]),2)) }

round(mean(extrahwD95)); summary(extrahwD95)

#------------------------

# 5) Calculation of the average period for each IMF (in days)

set.seed(101) # for repeatability

imf.ddp = postEEMD(dailydeath, 0.1, 60)

allimf = imf.ddp$postmode[,2:10]

datalength = length(dailydeath)

aveP <- NULL

for(i in 1:9) {

extre <- extrema(allimf[,i]) # find the number of peaks from the output

# of function 'extrema'.

midP <- datalength/length(extre$maxindex[,1])

aveP <- c(aveP, midP) }

aveP # check the results

# Note that 'length(extre$maxindex[,1]' calculates the number of peaks (i.e. maxima).

aveP = round(aveP) # round the results to the nearest integers (days)

for(i in 1:9){

cat("The average period for the", i,"th IMF is:", aveP[i], "days \n") }

#####################################################################################

# THE END #

#####################################################################################

Download