hw5_Upadhyay_Ambuj
.pdf
keyboard_arrow_up
School
University of California, Irvine *
*We aren’t endorsed by this school
Course
PRED
Subject
Industrial Engineering
Date
Apr 3, 2024
Type
Pages
23
Uploaded by SuperHumanLoris4024
hw5_Upadhyay_Ambuj.R
ambujupadhyay
2024-02-19
# Load necessary libraries
library
(fpp3)
# This includes packages like tsibble, fable, feasts for handling time series
## -- Attaching packages ---------------------------------------------- fpp3 0.5 --
## v tibble
3.2.1
v tsibble
1.1.4
## v dplyr
1.1.4
v tsibbledata 0.4.1
## v tidyr
1.3.0
v feasts
0.3.1
## v lubridate
1.9.3
v fable
0.3.3
## v ggplot2
3.4.4
v fabletools
0.3.4
## -- Conflicts ------------------------------------------------- fpp3_conflicts --
## x lubridate::date()
masks base::date()
## x dplyr::filter()
masks stats::filter()
## x tsibble::intersect() masks base::intersect()
## x tsibble::interval()
masks lubridate::interval()
## x dplyr::lag()
masks stats::lag()
## x tsibble::setdiff()
masks base::setdiff()
## x tsibble::union()
masks base::union()
library
(readr)
# For reading CSV files
library
(dplyr)
library
(ggplot2)
library
(tsibble)
#Question 1
# Load the bike sharing data
dat
<-
read_csv
(
"~/Downloads/academics/winter quarter/Predictive Analytics/homework 5/hw5_bike_share_day
## Rows: 731 Columns: 14
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr
(1): dteday
## dbl (13): dayID, season, yr, mnth, holiday, weekday, workday, weathersit, te...
##
## i Use
`
spec()
`
to retrieve the full column specification for this data.
## i Specify the column types or set
`
show_col_types = FALSE
`
to quiet this message.
# Convert the date column to a Date object and create a tsibble
dat
$
dteday
<-
as.Date
(dat
$
dteday,
format =
"%m/%d/%Y"
)
cnts
<-
ts
(dat[,
14
],
frequency =
7
)
cntts
<-
as_tsibble
(cnts)
cntts
<-
cntts
%>%
mutate
(
index =
as.Date
(dat
$
dteday,
format =
"%Y-%m-%d"
))
names
(cntts)[
2
]
<-
"count"
;
str
(cntts)
1
## tbl_ts [731 x 2] (S3: tbl_ts/tbl_df/tbl/data.frame)
##
$ index: Date[1:731], format: "2011-01-01" "2011-01-02" ...
##
$ count: num [1:731] 985 801 1349 1562 1600 ...
##
- attr(*, "key")= tibble [1 x 1] (S3: tbl_df/tbl/data.frame)
##
..$ .rows: list<int> [1:1]
##
.. ..
$ : int [1:731] 1 2 3 4 5 6 7 8 9 10 ...
##
.. ..
@ ptype: int(0)
##
- attr(*, "index")= chr "index"
##
..- attr(*, "ordered")= logi TRUE
##
- attr(*, "index2")= chr "index"
##
- attr(*, "interval")= interval [1:1] 1D
##
..@ .regular: logi TRUE
str
(cntts)
## tbl_ts [731 x 2] (S3: tbl_ts/tbl_df/tbl/data.frame)
##
$ index: Date[1:731], format: "2011-01-01" "2011-01-02" ...
##
$ count: num [1:731] 985 801 1349 1562 1600 ...
##
- attr(*, "key")= tibble [1 x 1] (S3: tbl_df/tbl/data.frame)
##
..$ .rows: list<int> [1:1]
##
.. ..
$ : int [1:731] 1 2 3 4 5 6 7 8 9 10 ...
##
.. ..
@ ptype: int(0)
##
- attr(*, "index")= chr "index"
##
..- attr(*, "ordered")= logi TRUE
##
- attr(*, "index2")= chr "index"
##
- attr(*, "interval")= interval [1:1] 1D
##
..@ .regular: logi TRUE
# Convert to a tsibble
cntts
<-
as_tsibble
(dat,
index =
dteday)
%>%
select
(dteday, cnt)
%>%
rename
(
count =
cnt)
# Now, plot using autoplot
autoplot
(cntts, count)
2
0
2500
5000
7500
2011-01
2011-07
2012-01
2012-07
2013-01
dteday [1D]
count
#Based on the plot. Here are some observations that could be indicative of
#underlying signals:
#1. Seasonality: There appears to be a recurring pattern that could be related
#to seasons, with peaks and troughs occurring at regular intervals. This is
#common in bike rental data where usage may increase during warmer months and
#decrease during colder months.
#2. Trend: There may be an underlying trend in the data, such as an overall
#increase or decrease in bike rentals over time. However, the trend is not
#immediately apparent from the plot and would require further analysis to confirm.
#3. Cyclical Changes: Beyond seasonality, there may be cyclical changes that are
#not of fixed frequency, which could be related to economic or other broad
#social factors.
#4. Irregularities or Outliers: There are several spikes and sharp drops in the
#rental counts that could be outliers or could indicate one-off events affecting
#bike rentals, such as holidays, strikes, or special events in the city.
#5. Noise: There is some degree of irregular fluctuation that does not appear to
#be systematic or recurring. This could be considered as noise in the data.
# Fit SES model with alpha = 0.25
fit_bike_SES_025
<-
model
(cntts,
ETS
(count
~
error
(
"A"
)
+
trend
(
"N"
,
alpha =
0.25
)
+
season
(
"N"
)))
report
(fit_bike_SES_025)
## Series: count
## Model: ETS(A,N,N)
##
Smoothing parameters:
3
##
alpha = 0.25
##
##
Initial states:
##
l[0]
##
1188.925
##
##
sigma^2:
933694.1
##
##
AIC
AICc
BIC
## 14871.50 14871.52 14880.69
accuracy
(fit_bike_SES_025)
## # A tibble: 1 x 10
##
.model
.type
ME
RMSE
MAE
MPE
MAPE
MASE RMSSE
ACF1
##
<chr>
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "ETS(count ~ error(\"A\~ Trai~
5.18
965.
689. -49.9
64.6 0.738 0.720 0.231
# Fit SES model with alpha = 0.75
fit_bike_SES_075
<-
model
(cntts,
ETS
(count
~
error
(
"A"
)
+
trend
(
"N"
,
alpha =
0.25
)
+
season
(
"N"
)))
report
(fit_bike_SES_075)
## Series: count
## Model: ETS(A,N,N)
##
Smoothing parameters:
##
alpha = 0.25
##
##
Initial states:
##
l[0]
##
1188.925
##
##
sigma^2:
933694.1
##
##
AIC
AICc
BIC
## 14871.50 14871.52 14880.69
accuracy
(fit_bike_SES_075)
## # A tibble: 1 x 10
##
.model
.type
ME
RMSE
MAE
MPE
MAPE
MASE RMSSE
ACF1
##
<chr>
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "ETS(count ~ error(\"A\~ Trai~
5.18
965.
689. -49.9
64.6 0.738 0.720 0.231
# Fit SES model with optimized parameters
fit_bike_SES_opt
<-
model
(cntts,
ETS
(count
~
error
(
"A"
)
+
trend
(
"N"
)
+
season
(
"N"
)))
report
(fit_bike_SES_opt)
## Series: count
## Model: ETS(A,N,N)
##
Smoothing parameters:
##
alpha = 0.2834019
##
##
Initial states:
##
l[0]
##
1162.607
4
##
##
sigma^2:
932845.1
##
##
AIC
AICc
BIC
## 14872.84 14872.87 14886.62
accuracy
(fit_bike_SES_opt)
## # A tibble: 1 x 10
##
.model
.type
ME
RMSE
MAE
MPE
MAPE
MASE RMSSE
ACF1
##
<chr>
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "ETS(count ~ error(\"A\~ Trai~
4.63
965.
689. -49.2
64.0 0.738 0.719 0.207
# Fit a naive model
fit_naive
<-
model
(cntts,
NAIVE
(count))
report
(fit_naive)
## Series: count
## Model: NAIVE
##
## sigma^2: 1134768.6797
accuracy
(fit_naive)
## # A tibble: 1 x 10
##
.model
.type
ME
RMSE
MAE
MPE
MAPE
MASE RMSSE
ACF1
##
<chr>
<chr>
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
<dbl>
## 1 NAIVE(count) Training
2.39 1065.
730. -33.5
50.4 0.782 0.794 -0.291
#Based on the results, we see that the fit_bike_SES_025, fit_bike_SES_075,
#and fit_bike_SES_opt models have very similar accuracy metrics.
#The fit_naive model has the worst RMSE and MAE values, which means it has a
#poorer fit in terms of these metrics compared to the other models. However,
#it does have a smaller MPE and MAPE, which suggests it might be better in
#terms of percentage errors.
#The fit_bike_SES_opt model has the lowest RMSE and MAE of the exponential
#smoothing models, indicating that allowing the model to optimize the parameters
#provides a slightly better fit in terms of these metrics. However, the
#differences are not large.
#In conclusion, based on the metrics, the fit_bike_SES_opt model seems
#to be the best fit overall due to having the lowest RMSE and MAE, although the
#improvement is marginal. The slight improvement could be due to the model being
#able to find the best smoothing parameter (alpha) through optimization.
#The fit_naive model seems to perform worse in terms of RMSE and MAE but has a
#better percentage error, which might make it preferable depending on the
#specific context or if the forecast error distribution is skewed.
#Question 2
# Assuming you have the
`
cntts
`
tsibble object from the previous question
# Load the fable package
library
(fable)
# Fit Holt
'
s additive trend model
fit_holt_add
<-
model
(cntts,
ETS
(count
~
error
(
"A"
)
+
trend
(
"A"
)
+
season
(
"N"
)))
# Fit Holt
'
s damped additive trend model
5
fit_holt_damped
<-
model
(cntts,
ETS
(count
~
error
(
"A"
)
+
trend
(
"Ad"
)
+
season
(
"N"
)))
# Summarize the models
report
(fit_holt_add)
## Series: count
## Model: ETS(A,A,N)
##
Smoothing parameters:
##
alpha = 0.2851936
##
beta
= 0.0001000645
##
##
Initial states:
##
l[0]
b[0]
##
1226.043 6.177343
##
##
sigma^2:
935936.9
##
##
AIC
AICc
BIC
## 14877.25 14877.33 14900.22
report
(fit_holt_damped)
## Series: count
## Model: ETS(A,Ad,N)
##
Smoothing parameters:
##
alpha = 0.2832013
##
beta
= 0.0001001071
##
phi
= 0.8990425
##
##
Initial states:
##
l[0]
b[0]
##
1218.343 6.761478
##
##
sigma^2:
936822.9
##
##
AIC
AICc
BIC
## 14878.93 14879.05 14906.50
# Compare accuracy of the additive model
accuracy_fit_holt_add
<-
accuracy
(fit_holt_add)
accuracy_fit_holt_add
## # A tibble: 1 x 10
##
.model
.type
ME
RMSE
MAE
MPE
MAPE
MASE RMSSE
ACF1
##
<chr>
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "ETS(count ~ error(\"A\~ Trai~ -18.4
965.
684. -50.1
64.1 0.733 0.719 0.206
# Compare accuracy of the damped additive model
accuracy_fit_holt_damped
<-
accuracy
(fit_holt_damped)
accuracy_fit_holt_damped
## # A tibble: 1 x 10
##
.model
.type
ME
RMSE
MAE
MPE
MAPE
MASE RMSSE
ACF1
##
<chr>
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "ETS(count ~ error(\"A\~ Trai~
4.03
965.
689. -49.3
64.0 0.738 0.719 0.207
6
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
- Access to all documents
- Unlimited textbook solutions
- 24/7 expert homework help