#####################################################
# 12/12/2012 prepared by John Xie #
#####################################################
# Demonstration of basic applications of #
# HHT algorithm in public health research. #
#####################################################
# import data
# upload R functions of HHT algorithm
load("ChicagoData.rda")
source("HHT-R-code-XIE-Dec2012.txt")
#----------------------------------------------------
# 'dailydeath' is the daily mortality time series of Chicago over
# the period 01/01/1995 to 30/06/2000.
# 1) EMD decomposition and significance test
EEMDR(dailydeath)
imf.dd = EEMDR(dailydeath, graphout =0)
modes = imf.dd$allmode[,2:9]
testimf(modes)
#-----------------
# 2) Ensemble EMD (EEMD) decomposition and significance test
imf.dds = EEMDR(dailydeath, 0.1, 100)
# EEMD runs n* = 100 times of EMD decomposition processes with
# added white noise, then take the average as the final
# decomposition results (i.e. generated IMFs?).
# Therefore, this process takes a few minutes to complete.
smodes = imf.dds$allmode[,2:9]
testimf(smodes)
# Unfortunately, the decomposed results by EEMD process do not
# guarantee they are the true IMFs. We need to do the post processing
# treatment on the EEMD generated IMF-like components.
# Therefore, the results given by 'testimf(smodes)' are not valid because
# the Hilbert Transform is only applicable to IMFs.
# In the post processing step, the EEMD generated 'IMFs' are used
# as the input series to go through a final EMD process to generate the
# IMFs for subsequent analysis.
# EEMD results, hence the post processing results, changes each time
# due to the random fluctuation caused by the added white noise effects.
set.seed(101) # for repeatability
imf.ddp = postEEMD(dailydeath, 0.1, 60)
postmodes = imf.ddp$postmode[,2:9]
testimf(postmodes)
# Note that there are eight IMFs in the EMD decomposition, but
# there are nine IMFs generated after the EEMD step. In general,
# the number of IMFs is equal or less than log2(n) - 1, where n
# is the sample size or data length. Since the non-trend IMFs can
# only come from the lower order (i.e. higher frequency) IMFs,
# we only check the first eight IMFs in the EEMD step. op <- par(mfcol=c(2,1),mar=c(2.5,3,1.5,1),mgp=c(1.5,0.5,0))
testimf(smodes)
title("significance test: EMD results")
testimf(postmodes)
title("significance test: post EEMD results") par(op)
# The huge difference between the EMD results and the post EEMD results
# indicates that mode mixing issue cannot be ignored. Hence, EEMD is necessary.
#-------------------------------------------
# 3) Determination of the non-trend IMFs.
# A visual inspection indicates that the first five IMFs may be considered
# as the non-trend IMFs.
# Do the formal significance test using the following command
testimf(postmodes, nfit=3, wnoise=0) # start from the first three IMFs;
testimf(postmodes, nfit=4, wnoise=0) # include one more IMF each time;
testimf(postmodes, nfit=5, wnoise=0) # All these three tests confirm that
# the first five IMFs are the non-trend components.
# the argument nfit=5 means we believe that first five IMFs need to be considered;
# wnoise = 0 means we found that the white noise assumption does not hold
# (the default setting is wnoise =1).
# Therefore, it is confirmed that the first five IMFs should be identified as
# non-trend IMFs.
# The white noise assumption is not true in this case and this can be verified by
# checking the acf plot of the identified nontrend IMFs.
Dnontrend = imf.dd$allmode[,2] + imf.dd$allmode[,3] + imf.dd$allmode[,4] +
imf.dd$allmode[,5] + imf.dd$allmode[,6]
acf(Dnontrend)
#-----------------
# 4) Calculation of the excess mortality for the 1995 Chicago heave wave period
# (11-18 July, 1995, 8 days).
# create the correct position index for the 1995 heave wave period:
heatwave95 <- c(192:199)
# check the result to make sure the index does cover the heat wave period:
year55[heatwave95]
# [1] 1995 1995 1995 1995 1995 1995 1995 1995
month55[heatwave95]
# [1] 7 7 7 7 7 7 7 7
date55[heatwave95]
# [1] 11 12 13 14 15 16 17 18
# Therefore, the excess mortality is simply
round(sum(Dnontrend[heatwave95]))
# [1] 477
# Because this result is subject to sampling variation, we repeat the same calculation
# procedure 20 times and take the average as our reported result as we did in this paper.
#
# The R code is given as follows. (To complete this 20 calculations took us about 30 minutes
# in a relatively old laptop computer.)
set.seed(101) # for result repeatability
#
extrahwD95 = NULL
heatwave95 <- c(192:199)
for(i in 1:20) {
imf.ddpc = postEEMD(dailydeath,0.1,60)
drandp = imf.ddpc$postmode[,2]+imf.ddpc$postmode[,3]+imf.ddpc$postmode[,4]+
imf.ddpc$postmode[,5]+imf.ddpc$postmode[,6]
extrahwD95 = c(extrahwD95, round(sum(drandp[heatwave95]),2)) }
round(mean(extrahwD95)); summary(extrahwD95)
#------------------------
# 5) Calculation of the average period for each IMF (in days)
set.seed(101) # for repeatability
imf.ddp = postEEMD(dailydeath, 0.1, 60)
allimf = imf.ddp$postmode[,2:10]
datalength = length(dailydeath)
aveP <- NULL
for(i in 1:9) {
extre <- extrema(allimf[,i]) # find the number of peaks from the output
# of function 'extrema'.
midP <- datalength/length(extre$maxindex[,1])
aveP <- c(aveP, midP) }
aveP # check the results
# Note that 'length(extre$maxindex[,1]' calculates the number of peaks (i.e. maxima).
aveP = round(aveP) # round the results to the nearest integers (days)
for(i in 1:9){
cat("The average period for the", i,"th IMF is:", aveP[i], "days \n") }
#####################################################################################
# THE END #
#####################################################################################