hw5_Upadhyay_Ambuj

.pdf

School

University of California, Irvine *

*We aren’t endorsed by this school

Course

PRED

Subject

Industrial Engineering

Date

Apr 3, 2024

Type

pdf

Pages

23

Uploaded by SuperHumanLoris4024

Report
hw5_Upadhyay_Ambuj.R ambujupadhyay 2024-02-19 # Load necessary libraries library (fpp3) # This includes packages like tsibble, fable, feasts for handling time series ## -- Attaching packages ---------------------------------------------- fpp3 0.5 -- ## v tibble 3.2.1 v tsibble 1.1.4 ## v dplyr 1.1.4 v tsibbledata 0.4.1 ## v tidyr 1.3.0 v feasts 0.3.1 ## v lubridate 1.9.3 v fable 0.3.3 ## v ggplot2 3.4.4 v fabletools 0.3.4 ## -- Conflicts ------------------------------------------------- fpp3_conflicts -- ## x lubridate::date() masks base::date() ## x dplyr::filter() masks stats::filter() ## x tsibble::intersect() masks base::intersect() ## x tsibble::interval() masks lubridate::interval() ## x dplyr::lag() masks stats::lag() ## x tsibble::setdiff() masks base::setdiff() ## x tsibble::union() masks base::union() library (readr) # For reading CSV files library (dplyr) library (ggplot2) library (tsibble) #Question 1 # Load the bike sharing data dat <- read_csv ( "~/Downloads/academics/winter quarter/Predictive Analytics/homework 5/hw5_bike_share_day ## Rows: 731 Columns: 14 ## -- Column specification -------------------------------------------------------- ## Delimiter: "," ## chr (1): dteday ## dbl (13): dayID, season, yr, mnth, holiday, weekday, workday, weathersit, te... ## ## i Use ` spec() ` to retrieve the full column specification for this data. ## i Specify the column types or set ` show_col_types = FALSE ` to quiet this message. # Convert the date column to a Date object and create a tsibble dat $ dteday <- as.Date (dat $ dteday, format = "%m/%d/%Y" ) cnts <- ts (dat[, 14 ], frequency = 7 ) cntts <- as_tsibble (cnts) cntts <- cntts %>% mutate ( index = as.Date (dat $ dteday, format = "%Y-%m-%d" )) names (cntts)[ 2 ] <- "count" ; str (cntts) 1
## tbl_ts [731 x 2] (S3: tbl_ts/tbl_df/tbl/data.frame) ## $ index: Date[1:731], format: "2011-01-01" "2011-01-02" ... ## $ count: num [1:731] 985 801 1349 1562 1600 ... ## - attr(*, "key")= tibble [1 x 1] (S3: tbl_df/tbl/data.frame) ## ..$ .rows: list<int> [1:1] ## .. .. $ : int [1:731] 1 2 3 4 5 6 7 8 9 10 ... ## .. .. @ ptype: int(0) ## - attr(*, "index")= chr "index" ## ..- attr(*, "ordered")= logi TRUE ## - attr(*, "index2")= chr "index" ## - attr(*, "interval")= interval [1:1] 1D ## ..@ .regular: logi TRUE str (cntts) ## tbl_ts [731 x 2] (S3: tbl_ts/tbl_df/tbl/data.frame) ## $ index: Date[1:731], format: "2011-01-01" "2011-01-02" ... ## $ count: num [1:731] 985 801 1349 1562 1600 ... ## - attr(*, "key")= tibble [1 x 1] (S3: tbl_df/tbl/data.frame) ## ..$ .rows: list<int> [1:1] ## .. .. $ : int [1:731] 1 2 3 4 5 6 7 8 9 10 ... ## .. .. @ ptype: int(0) ## - attr(*, "index")= chr "index" ## ..- attr(*, "ordered")= logi TRUE ## - attr(*, "index2")= chr "index" ## - attr(*, "interval")= interval [1:1] 1D ## ..@ .regular: logi TRUE # Convert to a tsibble cntts <- as_tsibble (dat, index = dteday) %>% select (dteday, cnt) %>% rename ( count = cnt) # Now, plot using autoplot autoplot (cntts, count) 2
0 2500 5000 7500 2011-01 2011-07 2012-01 2012-07 2013-01 dteday [1D] count #Based on the plot. Here are some observations that could be indicative of #underlying signals: #1. Seasonality: There appears to be a recurring pattern that could be related #to seasons, with peaks and troughs occurring at regular intervals. This is #common in bike rental data where usage may increase during warmer months and #decrease during colder months. #2. Trend: There may be an underlying trend in the data, such as an overall #increase or decrease in bike rentals over time. However, the trend is not #immediately apparent from the plot and would require further analysis to confirm. #3. Cyclical Changes: Beyond seasonality, there may be cyclical changes that are #not of fixed frequency, which could be related to economic or other broad #social factors. #4. Irregularities or Outliers: There are several spikes and sharp drops in the #rental counts that could be outliers or could indicate one-off events affecting #bike rentals, such as holidays, strikes, or special events in the city. #5. Noise: There is some degree of irregular fluctuation that does not appear to #be systematic or recurring. This could be considered as noise in the data. # Fit SES model with alpha = 0.25 fit_bike_SES_025 <- model (cntts, ETS (count ~ error ( "A" ) + trend ( "N" , alpha = 0.25 ) + season ( "N" ))) report (fit_bike_SES_025) ## Series: count ## Model: ETS(A,N,N) ## Smoothing parameters: 3
## alpha = 0.25 ## ## Initial states: ## l[0] ## 1188.925 ## ## sigma^2: 933694.1 ## ## AIC AICc BIC ## 14871.50 14871.52 14880.69 accuracy (fit_bike_SES_025) ## # A tibble: 1 x 10 ## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1 ## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> ## 1 "ETS(count ~ error(\"A\~ Trai~ 5.18 965. 689. -49.9 64.6 0.738 0.720 0.231 # Fit SES model with alpha = 0.75 fit_bike_SES_075 <- model (cntts, ETS (count ~ error ( "A" ) + trend ( "N" , alpha = 0.25 ) + season ( "N" ))) report (fit_bike_SES_075) ## Series: count ## Model: ETS(A,N,N) ## Smoothing parameters: ## alpha = 0.25 ## ## Initial states: ## l[0] ## 1188.925 ## ## sigma^2: 933694.1 ## ## AIC AICc BIC ## 14871.50 14871.52 14880.69 accuracy (fit_bike_SES_075) ## # A tibble: 1 x 10 ## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1 ## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> ## 1 "ETS(count ~ error(\"A\~ Trai~ 5.18 965. 689. -49.9 64.6 0.738 0.720 0.231 # Fit SES model with optimized parameters fit_bike_SES_opt <- model (cntts, ETS (count ~ error ( "A" ) + trend ( "N" ) + season ( "N" ))) report (fit_bike_SES_opt) ## Series: count ## Model: ETS(A,N,N) ## Smoothing parameters: ## alpha = 0.2834019 ## ## Initial states: ## l[0] ## 1162.607 4
## ## sigma^2: 932845.1 ## ## AIC AICc BIC ## 14872.84 14872.87 14886.62 accuracy (fit_bike_SES_opt) ## # A tibble: 1 x 10 ## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1 ## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> ## 1 "ETS(count ~ error(\"A\~ Trai~ 4.63 965. 689. -49.2 64.0 0.738 0.719 0.207 # Fit a naive model fit_naive <- model (cntts, NAIVE (count)) report (fit_naive) ## Series: count ## Model: NAIVE ## ## sigma^2: 1134768.6797 accuracy (fit_naive) ## # A tibble: 1 x 10 ## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1 ## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> ## 1 NAIVE(count) Training 2.39 1065. 730. -33.5 50.4 0.782 0.794 -0.291 #Based on the results, we see that the fit_bike_SES_025, fit_bike_SES_075, #and fit_bike_SES_opt models have very similar accuracy metrics. #The fit_naive model has the worst RMSE and MAE values, which means it has a #poorer fit in terms of these metrics compared to the other models. However, #it does have a smaller MPE and MAPE, which suggests it might be better in #terms of percentage errors. #The fit_bike_SES_opt model has the lowest RMSE and MAE of the exponential #smoothing models, indicating that allowing the model to optimize the parameters #provides a slightly better fit in terms of these metrics. However, the #differences are not large. #In conclusion, based on the metrics, the fit_bike_SES_opt model seems #to be the best fit overall due to having the lowest RMSE and MAE, although the #improvement is marginal. The slight improvement could be due to the model being #able to find the best smoothing parameter (alpha) through optimization. #The fit_naive model seems to perform worse in terms of RMSE and MAE but has a #better percentage error, which might make it preferable depending on the #specific context or if the forecast error distribution is skewed. #Question 2 # Assuming you have the ` cntts ` tsibble object from the previous question # Load the fable package library (fable) # Fit Holt ' s additive trend model fit_holt_add <- model (cntts, ETS (count ~ error ( "A" ) + trend ( "A" ) + season ( "N" ))) # Fit Holt ' s damped additive trend model 5
fit_holt_damped <- model (cntts, ETS (count ~ error ( "A" ) + trend ( "Ad" ) + season ( "N" ))) # Summarize the models report (fit_holt_add) ## Series: count ## Model: ETS(A,A,N) ## Smoothing parameters: ## alpha = 0.2851936 ## beta = 0.0001000645 ## ## Initial states: ## l[0] b[0] ## 1226.043 6.177343 ## ## sigma^2: 935936.9 ## ## AIC AICc BIC ## 14877.25 14877.33 14900.22 report (fit_holt_damped) ## Series: count ## Model: ETS(A,Ad,N) ## Smoothing parameters: ## alpha = 0.2832013 ## beta = 0.0001001071 ## phi = 0.8990425 ## ## Initial states: ## l[0] b[0] ## 1218.343 6.761478 ## ## sigma^2: 936822.9 ## ## AIC AICc BIC ## 14878.93 14879.05 14906.50 # Compare accuracy of the additive model accuracy_fit_holt_add <- accuracy (fit_holt_add) accuracy_fit_holt_add ## # A tibble: 1 x 10 ## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1 ## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> ## 1 "ETS(count ~ error(\"A\~ Trai~ -18.4 965. 684. -50.1 64.1 0.733 0.719 0.206 # Compare accuracy of the damped additive model accuracy_fit_holt_damped <- accuracy (fit_holt_damped) accuracy_fit_holt_damped ## # A tibble: 1 x 10 ## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1 ## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> ## 1 "ETS(count ~ error(\"A\~ Trai~ 4.03 965. 689. -49.3 64.0 0.738 0.719 0.207 6
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
  • Access to all documents
  • Unlimited textbook solutions
  • 24/7 expert homework help