I have a df of 10 years of hourly measurements and I want to add a new variable containing german holidays.
In the end I want a new column containing TRUE/FALSE for the holidays.
So far I've tried the 'holiday' function from the timeDate package, different ifelse functions and loops. The data in a MRE format (I hope):
#mydata
df <– structure(list(value = c(359.9, 60.3, 13.7, 25.1, 12.4, 27.1,
80.2, 24.3, 30.1, 28.8, 45.1, 58.2, 65.6, 55.6, 69.9, 13.5, 20.1,
41.3, 9.9, 12.1), urban = c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE), traffic = c(FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE), date = structure(c(14245,
14246, 14247, 14248, 14249, 14250, 14251, 14252, 14253, 14254,
14255, 14256, 14257, 14258, 14259, 14260, 14261, 14262, 14263,
14264), class = "Date"), season = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("winter", "spring", "summer", "fall"), class = "factor")), row.names = c(NA,
20L), class = "data.frame", .Names = c("value", "urban", "traffic",
"date", "season"))
#holidays (some vary over years)
holidays <- data.frame(
K.Fr = c("2009-04-10", "2010-04-02", "2011-04-22", "2012-04-06", "2013-03-29", "2014-04-18", "2015-04-03", "2016-03-25", "2017-04-14", "2018-03-30"),
OsterMo = c("2009-04-13", "2010-04-05", "2011-04-25", "2012-04-09", "2013-04-01", "2014-04-21", "2015-04-06", "2016-03-28", "2017-04-17", "2018-04-02"),
PfingstMo = c("2009-06-01", "2010-05-24", "2011-06-13", "2012-05-28", "2013-05-20", "2014-06-09", "2015-05-25", "2016-05-16", "2017-06-05", "2018-05-21"),
W1 = c("2009-12-25", "2010-12-25", "2011-12-25", "2012-12-25", "2013-12-25", "2014-12-25", "2015-12-25", "2016-12-25", "2017-12-25", "2018-12-25"),
W2 = c("2009-12-26", "2010-12-26", "2011-12-26", "2012-12-26", "2013-12-26", "2014-12-26", "2015-12-26", "2016-12-26", "2017-12-26", "2018-12-26"),
ErsterMai = c("2009-05-01", "2010-05-01", "2011-05-01", "2012-05-01", "2013-05-01", "2014-05-01", "2015-05-01", "2016-05-01", "2017-05-01", "2018-05-01"),
Neujahr = c("2009-01-01", "2010-01-01", "2011-01-01", "2012-01-01", "2013-01-01", "2014-01-01", "2015-01-01", "2016-01-01", "2017-01-01", "2018-01-01"),
F1 = c("2009-05-21", "2009-10-03", "2010-05-13", "2010-10-03", "2011-06-02", "2011-10-03", "2012-05-17", "2012-10-03", "2013-05-09", "2013-10-03"),
F2 = c("2014-05-29", "2014-10-03", "2015-05-14", "2015-10-03", "2016-05-05", "2016-10-03", "2017-05-25", "2017-10-03", "2018-05-10","2018-10-03")
)
holidays <- gather(holidays, key = "holiday", value = "date")
holidays <- holidays[,2]
holidays <- as.POSIXct(
holidays,
format = "%Y-%m-%d"
)
holidays <- as.Date(holidays)
And here is what I tried yet:
df <- dplyr::mutate(
df,
ifelse(df$date == holidays[1],
T,
F)
)
df <- for (i in seq(nrow(df$date))){
ifelse(df$date[i] == holidays,
T,
F)
}
x <- 1
repeat {
df <- for (i in seq(nrow(df$date))){
ifelse(df$date == holidays[x],
T,
F)
}
x = x+1
if (x == nrow(df)){
break
}
}
Aucun commentaire:
Enregistrer un commentaire