Uploaded by Richard Goza

Predictive analysis

advertisement
#Install and load required libaries
if(!require(forecast)) install.packages("forecast")
if(!require(zoo)) install.packages("zoo")
if(!require(tidyverse)) install.packages("tidyverse")
if(!require(fpp2)) install.packages("fpp2")
if (!require(xts)) install.packages("xts")
if (!require(forecastHybrid)) install.packages("forecastHybrid")
library(forecast)
library(zoo)
library(tidyverse)
library(fpp2)
library(xts)
library(forecastHybrid)
#used to import data
ST <- read.csv(file.choose(), header = TRUE, stringsAsFactors = FALSE)
#used to amend date format
ST$ShiftDate <- as.Date(ST$ShiftDate, format = "%m/%d/%Y %H:%M")
head(ST)
tail(ST)
summary(ST)
#used to creat time series object
st.ts <- ts(ST$SpotTime, start = c(2021, 12, 25), frequency = 365)
summary(st.ts)
length(st.ts)
str(st.ts)
summary(st.ts)[1]
summary(st.ts)[6]
ylow = round(summary(st.ts)[1] -5)
yhigh = summary(st.ts)[6] + 17.56
paste(ylow,yhigh)
#used to plot time series
colors <- c("blue", "red")
time_vector <- seq(as.Date("2021-12-01"), by = "days", length.out = length(st.ts))
plot(time_vector,
st.ts,
ylim = c(ylow, yhigh),
ylab = "Duration in Seconds",
xlab = "Time Horizon",
col = colors[1],
main = "SpotTime Time Series",
type = "l",
lwd = 2 )
legend("topright", legend = c("SpotTime"), col = colors, lwd = 2)
grid()
#quick plot to see moving average
window_size <- 7
moving_avg <- rollapply(st.ts, width = window_size, FUN = mean, align = "right", fill = NA)
plot(time_vector,
st.ts,
ylim = c(ylow, yhigh),
ylab = "Duration in Seconds",
xlab = "Time Horizon",
col = colors[1],
main = "SpotTime Time Series",
type = "l",
lwd = 2 )
lines(time_vector, moving_avg, col = "red", lwd = 2)
legend("topright", legend = c("SpotTime", "Moving Avg"), col = c(colors[1], colors[2]), lwd = 2)
grid()
#quick ggoplot of various forecast model just quick views
autoplot(forecast_ets)
autoplot(forecast_arima)
autoplot(forecast_nnar)
#used to split data set into train and test sets
nValid <- 12
nTrain <- length(st.ts) - nValid
train.ts <- window(st.ts, start = c(2021, 1), end= c(2021, nTrain))
valid.ts <- window(st.ts, start = c(2021, nTrain + 1), end = c(2021, nTrain + nValid))
#perform naive moving average forecast
moving_avg <- rollapply(st.ts, width = window_size, FUN = mean, align = "right", fill = NA)
last.ma <- tail(moving_avg, 1)
moving_avg.pred <- ts(rep(last.ma, nValid), start = c(2021, nTrain + 1), end = c(2021, nTrain + nValid),
frequency = 365)
plot(train.ts, ylim = c(ylow, yhigh), ylab = "SpotTime in Sec", xlab= "Time Horizon", bty ="l", xaxt = "n", xlim
= c(2021, 2022), main = "Actual VS Naive Forecast")
axis(1, at= seq(2021,2022,12), labels = format(seq(2021,2022,12)))
lines(moving_avg, lwd = 2, col="blue")
lines(moving_avg.pred, lwd = 2, col = "green", lty =2)
lines(valid.ts)
naive_error <- valid.ts - moving_avg.pred
mae <- mean(abs(naive_error), na.rm = TRUE)
mse <- mean(naive_error^2, na.rm = TRUE)
rmse <- sqrt(mse)
mape_naive <- mean(abs(naive_error / valid.ts) * 100, na.rm = TRUE)
cat("Mean Absolute Error (MAE):", mae, "\n")
cat("Mean Squared Error (MSE):", mse, "\n")
cat("Root Mean Squared Error (RMSE):", rmse, "\n")
cat("Mean Absolute Percentage Error (MAPE) for Naive Forecast:", mape_naive, "%\n")
checkresiduals(naive(st.ts))
# Perform exponential smoothing forecast
forecast_model <- ets(train.ts)
forecast_values <- forecast(forecast_model, h = nValid)
forecasted_ts <- forecast_values$mean
naive_error <- valid.ts - forecasted_ts
mae <- mean(abs(naive_error), na.rm = TRUE)
mse <- mean(naive_error^2, na.rm = TRUE)
rmse <- sqrt(mse)
mape_exponential <- mean(abs(naive_error / valid.ts) * 100, na.rm = TRUE)
cat("Mean Absolute Error (MAE):", mae, "\n")
cat("Mean Squared Error (MSE):", mse, "\n")
cat("Root Mean Squared Error (RMSE):", rmse, "\n")
cat("Mean Absolute Percentage Error (MAPE) for Exponential Smoothing Forecast:", mape_exponential,
"%\n")
plot(valid.ts, ylim = c(ylow, yhigh), ylab = "SpotTime in Sec", xlab = "Time Horizon",
bty = "l", xaxt = "n", xlim = c(2021, 2022), main = "Actual VS Exponential Smoothing Forecast")
axis(1, at = seq(2021, 2022, 12), labels = format(seq(2021, 2022, 12)))
lines(valid.ts, col = "red", lwd = 2)
lines(forecasted_ts, lwd = 2, col = "blue")
legend("topright", legend = c("Actual", "Forecast"), col = c("red", "blue"), lwd = 2)
# Perform nsemble forecast model with their individual model forecast
train <- window(st.ts, end=c(2022,12))
h <- length(st.ts) - length(train)
ARIMA <- forecast(auto.arima(train, lambda = 0, biasadj = TRUE), h=h)
NNAR <- forecast(nnetar(train), h=h)
TBATS <- forecast(tbats(train, biasadj = TRUE), h=h)
COMBINATON <- (ARIMA[["mean"]] + NNAR[["mean"]] + TBATS[["mean"]]) / 5
# Calculate MAE, MSE, and MAPE for each model
mae_ARIMA <- mean(abs(st.ts - ARIMA$mean), na.rm = TRUE)
mse_ARIMA <- mean((st.ts - ARIMA$mean)^2, na.rm = TRUE)
mape_ARIMA <- mean(abs((st.ts - ARIMA$mean) / st.ts) * 100, na.rm = TRUE)
mae_NNAR <- mean(abs(st.ts - NNAR$mean), na.rm = TRUE)
mse_NNAR <- mean((st.ts - NNAR$mean)^2, na.rm = TRUE)
mape_NNAR <- mean(abs((st.ts - NNAR$mean) / st.ts) * 100, na.rm = TRUE)
mae_TBATS <- mean(abs(st.ts - TBATS$mean), na.rm = TRUE)
mse_TBATS <- mean((st.ts - TBATS$mean)^2, na.rm = TRUE)
mape_TBATS <- mean(abs((st.ts - TBATS$mean) / st.ts) * 100, na.rm = TRUE)
mae_COMBINATION <- mean(abs(st.ts - COMBINATON), na.rm = TRUE)
mse_COMBINATION <- mean((st.ts - COMBINATON)^2, na.rm = TRUE)
mape_COMBINATION <- mean(abs((st.ts - COMBINATON) / st.ts) * 100, na.rm = TRUE)
# Print the MAE, MSE, and MAPE for each model
cat("ARIMA:\n")
cat("Mean Absolute Error (MAE):", mae_ARIMA, "\n")
cat("Mean Squared Error (MSE):", mse_ARIMA, "\n")
cat("Mean Absolute Percentage Error (MAPE):", mape_ARIMA, "%\n\n")
cat("NNAR:\n")
cat("Mean Absolute Error (MAE):", mae_NNAR, "\n")
cat("Mean Squared Error (MSE):", mse_NNAR, "\n")
cat("Mean Absolute Percentage Error (MAPE):", mape_NNAR, "%\n\n")
cat("TBATS:\n")
cat("Mean Absolute Error (MAE):", mae_TBATS, "\n")
cat("Mean Squared Error (MSE):", mse_TBATS, "\n")
cat("Mean Absolute Percentage Error (MAPE):", mape_TBATS, "%\n\n")
cat("COMBINATION:\n")
cat("Mean Absolute Error (MAE):", mae_COMBINATION, "\n")
cat("Mean Squared Error (MSE):", mse_COMBINATION, "\n")
cat("Mean Absolute Percentage Error (MAPE):", mape_COMBINATION, "%\n")
# Create a plot
autoplot(st.ts) + autolayer(ARIMA, series = "ARIMA", PI = FALSE) + autolayer(NNAR, series = "NNAR", PI =
FALSE) + autolayer(TBATS, series = "TBATS", PI = FALSE) + autolayer(COMBINATON, series =
"COMBINATION", PI = FALSE) + xlab("Time Horizon") + ylab("Spot Time Duration") + ggtitle("Actual Spot
Cycles vs Other Model Forecast")
Download