mardi 30 juin 2020

How to assign TRUE / FALSE to dates (holidays) as boolean variable in r?

I have a df of 10 years of hourly measurements and I want to add a new variable containing german holidays.

In the end I want a new column containing TRUE/FALSE for the holidays.

So far I've tried the 'holiday' function from the timeDate package, different ifelse functions and loops. The data in a MRE format (I hope):

#mydata
df <– structure(list(value = c(359.9, 60.3, 13.7, 25.1, 12.4, 27.1, 
80.2, 24.3, 30.1, 28.8, 45.1, 58.2, 65.6, 55.6, 69.9, 13.5, 20.1, 
41.3, 9.9, 12.1), urban = c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 
TRUE, TRUE, TRUE), traffic = c(FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE), date = structure(c(14245, 
14246, 14247, 14248, 14249, 14250, 14251, 14252, 14253, 14254, 
14255, 14256, 14257, 14258, 14259, 14260, 14261, 14262, 14263, 
14264), class = "Date"), season = structure(c(1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("winter", "spring", "summer", "fall"), class = "factor")), row.names = c(NA, 
20L), class = "data.frame", .Names = c("value", "urban", "traffic", 
"date", "season"))

#holidays (some vary over years)
holidays <- data.frame(
  K.Fr = c("2009-04-10", "2010-04-02", "2011-04-22", "2012-04-06", "2013-03-29", "2014-04-18", "2015-04-03", "2016-03-25", "2017-04-14", "2018-03-30"),
  OsterMo = c("2009-04-13", "2010-04-05", "2011-04-25", "2012-04-09", "2013-04-01", "2014-04-21", "2015-04-06", "2016-03-28", "2017-04-17", "2018-04-02"),
  PfingstMo = c("2009-06-01", "2010-05-24", "2011-06-13", "2012-05-28", "2013-05-20", "2014-06-09", "2015-05-25", "2016-05-16", "2017-06-05", "2018-05-21"),
  W1 = c("2009-12-25", "2010-12-25", "2011-12-25", "2012-12-25", "2013-12-25", "2014-12-25", "2015-12-25", "2016-12-25", "2017-12-25", "2018-12-25"),
  W2 = c("2009-12-26", "2010-12-26", "2011-12-26", "2012-12-26", "2013-12-26", "2014-12-26", "2015-12-26", "2016-12-26", "2017-12-26", "2018-12-26"),
  ErsterMai = c("2009-05-01", "2010-05-01", "2011-05-01", "2012-05-01", "2013-05-01", "2014-05-01", "2015-05-01", "2016-05-01", "2017-05-01", "2018-05-01"),
  Neujahr = c("2009-01-01", "2010-01-01", "2011-01-01", "2012-01-01", "2013-01-01", "2014-01-01", "2015-01-01", "2016-01-01", "2017-01-01", "2018-01-01"),
  F1 = c("2009-05-21", "2009-10-03", "2010-05-13",  "2010-10-03", "2011-06-02",  "2011-10-03", "2012-05-17",  "2012-10-03", "2013-05-09", "2013-10-03"),
  F2 = c("2014-05-29",  "2014-10-03", "2015-05-14", "2015-10-03", "2016-05-05",  "2016-10-03", "2017-05-25",  "2017-10-03", "2018-05-10","2018-10-03")
)

holidays <- gather(holidays, key = "holiday", value = "date")
holidays <- holidays[,2]
holidays <- as.POSIXct(
  holidays,
  format = "%Y-%m-%d"
)
holidays <- as.Date(holidays)

And here is what I tried yet:

df <- dplyr::mutate(
  df,
  ifelse(df$date == holidays[1],
         T,
         F)
)

df <- for (i in seq(nrow(df$date))){
  ifelse(df$date[i] == holidays,
         T,
         F)
}

x <- 1
repeat {
  df <- for (i in seq(nrow(df$date))){
  ifelse(df$date == holidays[x],
         T,
         F)
  } 
  x = x+1
  if (x == nrow(df)){
    break
  }
}

Aucun commentaire:

Enregistrer un commentaire