jeudi 16 août 2018

Loop with if else and next R

I am combining a loop with if else and next in R.

In order to reproduce my problem in its complexity, I cannot provide a minimum example but a big piece of code. The aim is to fill the list df with min, max, 50percentile in the columns rmse_1 and rmse_2.

You need to change the path accordung to your desired path at the positions marked with # !!! change path. If you changed the paths you can run the code:

# create lists
mse_samp <- list("mse_A" = list("P10" = data.frame(number = seq(100,1000,200), 
                                                   mse_1 = c(2.5, 4.6, 7.8, 1.2, 3.9), 
                                                   mse_2 = c(6.7, 8.9, 4.1, 3.5, 2.2)), 
                                "P30" = data.frame(number = seq(100,1000,200), 
                                                   mse_1 = c(22.5, 74.6, 97.8, 21.2, 43.9), 
                                                   mse_2 = c(56.7, 78.9, 14.1, 23.5, 82.2)),
                                "P50" = data.frame(number = seq(100,1000,200), 
                                                   mse_1 = c(1022.5, 3074.6, 9097.8, 1221.2, 4343.9), 
                                                   mse_2 = c(4656.7, 2278.9, 4314.1, 7623.5, 8982.2))),
                 "mse_B" = list("P10" = data.frame(number = seq(100,1000,200), 
                                                   mse_1 = c(122.5, 124.6, 127.8, 121.2, 123.9), 
                                                   mse_2 = c(126.7, 128.9, 124.1, 123.5, 122.2)), 
                                "P30" = data.frame(number = seq(100,1000,200), 
                                                   mse_1 = c(3422.5, 3474.6, 3497.8, 3421.2, 3443.9), 
                                                   mse_2 = c(3456.7, 3478.9, 3414.1, 3423.5, 3482.2)),
                                "P50" = data.frame(number = seq(100,1000,200), 
                                                   mse_1 = c(1022.5, 3074.6, 9097.8, 1221.2, 4343.9), 
                                                   mse_2 = c(4656.7, 2278.9, 4314.1, 7623.5, 8982.2))),
                 "mse_C" = list("P10" = data.frame(number = seq(100,1000,200), 
                                                   mse_1 = c(0.5, 0.6, 0.8, 0.2, 3.9), 
                                                   mse_2 = c(5.7, 1.9, 7.1, 3.5, 9.2)), 
                                "P30" = data.frame(number = seq(100,1000,200), 
                                                   mse_1 = c(722.5, 874.6, 997.8, 121.2, 343.9), 
                                                   mse_2 = c(556.7, 678.9, 314.1, 723.5, 282.2)),
                                "P50" = data.frame(number = seq(100,1000,200), 
                                                   mse_1 = c(11022.5, 13074.6, 19097.8, 11221.2, 14343.9), 
                                                   mse_2 = c(14656.7, 12278.9, 14314.1, 17623.5, 18982.2))))
# !!! change path
save(mse_samp, 
     file="H:\\R\\Forum_data\\dat1.RData")

mse_samp <- list("mse_A" = list("P10" = data.frame(number = seq(100,1000,200), 
                                                   mse_1 = c(2.2, 7.6, 7.8, 1.2, 3.9), 
                                                   mse_2 = c(6.7, 8.9, 7.1, 3.2, 2.2)), 
                                "P30" = data.frame(number = seq(100,1000,200), 
                                                   mse_1 = c(221.5, 741.6, 971.8, 211.2, 431.9), 
                                                   mse_2 = c(56.7, 78.9, 14.1, 23.5, 82.2)),
                                "P70" = data.frame(number = seq(100,1000,200), 
                                                   mse_1 = c(22.2, 77.6, 97.8, 21.2, 73.9), 
                                                   mse_2 = c(26.7, 78.9, 17.1, 23.2, 82.2)),
                                "P80" = data.frame(number = seq(100,1000,200), 
                                                   mse_1 = c(1022.2, 3077.6, 9097.8, 1221.2, 7373.9), 
                                                   mse_2 = c(7626.7, 2278.9, 7317.1, 7623.2, 8982.2))),
                 "mse_B" = list("P10" = data.frame(number = seq(100,1000,200), 
                                                   mse_1 = c(122.2, 127.6, 127.8, 121.2, 123.9), 
                                                   mse_2 = c(126.7, 128.9, 127.1, 123.2, 122.2)),
                                "P30" = data.frame(number = seq(100,1000,200), 
                                                   mse_1 = c(221.5, 741.6, 971.8, 211.2, 431.9), 
                                                   mse_2 = c(56.7, 78.9, 14.1, 23.5, 82.2)),
                                "P70" = data.frame(number = seq(100,1000,200), 
                                                   mse_1 = c(3722.2, 3777.6, 3797.8, 3721.2, 3773.9), 
                                                   mse_2 = c(3726.7, 3778.9, 3717.1, 3723.2, 3782.2)),
                                "P80" = data.frame(number = seq(100,1000,200), 
                                                   mse_1 = c(1022.2, 3077.6, 9097.8, 1221.2, 7373.9), 
                                                   mse_2 = c(7626.7, 2278.9, 7317.1, 7623.2, 8982.2))),
                 "mse_C" = list("P10" = data.frame(number = seq(100,1000,200), 
                                                   mse_1 = c(0.2, 0.6, 0.8, 0.2, 3.9), 
                                                   mse_2 = c(2.7, 1.9, 7.1, 3.2, 9.2)),
                                "P30" = data.frame(number = seq(100,1000,200), 
                                                   mse_1 = c(221.5, 741.6, 971.8, 211.2, 431.9), 
                                                   mse_2 = c(56.7, 78.9, 14.1, 23.5, 82.2)),
                                "P70" = data.frame(number = seq(100,1000,200), 
                                                   mse_1 = c(722.2, 877.6, 997.8, 121.2, 373.9), 
                                                   mse_2 = c(226.7, 678.9, 317.1, 723.2, 282.2)),
                                "P80" = data.frame(number = seq(100,1000,200), 
                                                   mse_1 = c(11022.2, 13077.6, 19097.8, 11221.2, 17373.9), 
                                                   mse_2 = c(17626.7, 12278.9, 17317.1, 17623.2, 18982.2))))
# !!! change path
save(mse_samp, 
     file="H:\\R\\Forum_data\\dat2.RData")

mse_samp <- list("mse_A" = list("P10" = data.frame(number = seq(100,1000,200), 
                                                   mse_1 = c(0.2, 0.6, 0.8, 0.2, 3.9), 
                                                   mse_2 = c(2.7, 1.9, 7.1, 3.2, 9.2)),
                                "P30" = data.frame(number = seq(100,1000,200), 
                                                   mse_1 = c(2.2, 9.6, 9.1, 1.2, 3.9), 
                                                   mse_2 = c(6.9, 1.9, 9.1, 3.2, 2.2)), 
                                "P90" = data.frame(number = seq(100,1000,200), 
                                                   mse_1 = c(22.2, 99.6, 99.1, 21.2, 93.9), 
                                                   mse_2 = c(26.9, 91.9, 19.1, 23.2, 12.2)),
                                "P100" = data.frame(number = seq(100,1000,200), 
                                                    mse_1 = c(1022.2, 3099.6, 9099.1, 1221.2, 9393.9), 
                                                    mse_2 = c(9626.9, 2291.9, 9319.1, 9623.2, 1912.2))),
                 "mse_B" = list("P10" = data.frame(number = seq(100,1000,200), 
                                                   mse_1 = c(0.2, 0.6, 0.8, 0.2, 3.9), 
                                                   mse_2 = c(2.7, 1.9, 7.1, 3.2, 9.2)),
                                "P30" = data.frame(number = seq(100,1000,200), 
                                                   mse_1 = c(122.2, 129.6, 129.1, 121.2, 123.9), 
                                                   mse_2 = c(126.9, 121.9, 129.1, 123.2, 122.2)), 
                                "P90" = data.frame(number = seq(100,1000,200), 
                                                   mse_1 = c(3922.2, 3999.6, 3999.1, 3921.2, 3993.9), 
                                                   mse_2 = c(3926.9, 3991.9, 3919.1, 3923.2, 3912.2)),
                                "P100" = data.frame(number = seq(100,1000,200), 
                                                    mse_1 = c(1022.2, 3099.6, 9099.1, 1221.2, 9393.9), 
                                                    mse_2 = c(9626.9, 2291.9, 9319.1, 9623.2, 1912.2))),
                 "mse_C" = list("P10" = data.frame(number = seq(100,1000,200), 
                                                   mse_1 = c(0.2, 0.6, 0.8, 0.2, 3.9), 
                                                   mse_2 = c(2.7, 1.9, 7.1, 3.2, 9.2)),
                                "P30" = data.frame(number = seq(100,1000,200), 
                                                   mse_1 = c(0.2, 0.6, 0.1, 0.2, 3.9), 
                                                   mse_2 = c(2.9, 1.9, 9.1, 3.2, 9.2)), 
                                "P90" = data.frame(number = seq(100,1000,200), 
                                                   mse_1 = c(922.2, 199.6, 999.1, 121.2, 393.9), 
                                                   mse_2 = c(226.9, 691.9, 319.1, 923.2, 212.2)),
                                "P100" = data.frame(number = seq(100,1000,200), 
                                                    mse_1 = c(11022.2, 13099.6, 19099.1, 11221.2, 19393.9), 
                                                    mse_2 = c(19626.9, 12291.9, 19319.1, 19623.2, 11912.2))))
# !!! change path
save(mse_samp, 
     file="H:\\R\\Forum_data\\dat3.RData")

# create table for min max for different perc and runs for each paramter (loop)
n_measure <- 3 # number of different measures
npr1 <- 3 # number of different percs run1
npr2 <- 4 # number of different percs run2
npr3 <- 4 # number of different percs run3

targets <- c("A",  "B", "C")

for (i in 1:length(targets)) {
  df <- data.frame(run = c(rep("run1", n_measure * npr1),
                           rep("run2", n_measure * npr2),
                           rep("run3", n_measure * npr3)),

                   perc_train = c(rep(c(0.1, 0.3, 0.5), times = 1, each = n_measure), # percs run 1
                                  rep(c(0.1, 0.3, 0.7, 0.8), times = 1, each = n_measure), # percs run 2
                                  rep(c(0.1, 0.3, 0.9, 1), times = 1, each = n_measure)), # perc run 3

                   measure = c(rep(c("min", "max", "50percentile"),
                                   times = npr1 + npr2 + npr3, each = 1)),

                   rmse_1 = rep(NA,  n_measure * (npr1 + npr2 + npr3)),
                   rmse_2 = rep(NA,  n_measure * (npr1 + npr2 + npr3))
  )

  assign(paste0('df_', targets[i]), df)

}

df <- list("A" = df_A,  "B" = df_B, "C" = df_C)

# convert column which are factors to characters
for (i in 1:length(targets)) {

  df[[i]][sapply(df[[i]], is.factor)] <- lapply(df[[i]][sapply(df[[i]], is.factor)], 
                                                as.character)
}

rm(list = c("df_A", "df_B", "df_C"))
# !!! change path
path <- c("H:\\R\\Forum_data\\dat1.RData", # run1
# !!! change path          
          "H:\\R\\Forum_data\\dat2.RData", # run2
# !!! change path
          "H:\\R\\Forum_data\\dat3.RData") # run3

percs_names <- c("P10", "P30", "P50", "P70", "P80", "P90", "1")
percs <- c(0.1, 0.3, 0.5, 0.7, 0.8, 0.9, 1)
targets <- c("A", "B", "C")
run_name <- c("run1", "run2", "run3")
measure_name <- c("min", "max", "50percentile")

# read in data
for (i in 1:length(path)) {
  load(path[i])

  dat <- mse_samp


  for (j in 1:length(targets)) {
    for (k in 1:length(percs_names)) {
      # if statement
      if(percs_names[k] == names(dat[[j]][k])){

        dat1 <- dat[[paste0("mse_", targets[j])]][k][[1]]
        rmse_min_1 <- sqrt(min(dat1$mse_1))
        rmse_min_2 <- sqrt(min(dat1$mse_2))
        rmse_max_1 <- sqrt(max(dat1$mse_1))
        rmse_max_2 <- sqrt(max(dat1$mse_2))
        percentile_50_1 <- quantile(sqrt(dat1$mse_1), probs = 0.5)
        percentile_50_2 <- quantile(sqrt(dat1$mse_2), probs = 0.5)

        for (m in 1:length(measure_name)) {
          a <- which(df[[targets[j]]]$run == run_name[i] & 
                       df[[targets[j]]]$measure == measure_name[m] & 
                       df[[targets[j]]]$perc_train == percs[k] &
                       is.na(df[[targets[j]]]$rmse_1)
          )
        }

        df[[targets[j]]][a, which(names(df[[targets[j]]]) == "rmse_1")] <- rmse_min_1
      }
      else { next }
    }
  }
}

After running the code, the following error message occurs: Error in if (percs_names[k] == names(dat[[j]][k])) { : missing value where TRUE/FALSE needed

I guess the problem is probably within the if elsestatement. How can I run the code without the error?

Currently only rows for run1 and 50percentile are filled in columns rmse_1 and rmse_2. How I can fill rows min and max for all runs as well? In the end there should be no NA left.

Aucun commentaire:

Enregistrer un commentaire