mercredi 17 février 2021

R: Error in If, missing value where TRUE/FALSE needed. Only get it in the non reproducible code

made the following reproducible code which checks if a condition is true and then adds a value to a vector:

days= runif(2000,min=1,max=1173)
mbuckets = as.data.frame(c(1,2,3,4,5,6,7,8,15,22,29,59,89,119,137,168,209,229,259,
             299,321,351,389,412,443,479,502,533,569,594,624,659,686,
             716,749,777,808,839,867,898,929,959,1019,1020,1051,1081,
             1112,1142,1173))
bucket = rep(0, length(days))
cvar = 0
for (i in days) {
  cvar = cvar+1
  for (j in 1:length(mbuckets[,1])) {
    if(i<=mbuckets[j+1,1] & i>mbuckets[j,1]) {
      bucket[cvar] = mbuckets[j+1,1]
    }
    print(i<=mbuckets[j+1,1] & i>mbuckets[j,1])
    print(i)
    print(j)
  }
}
class(day)

In this case, I obtain what I was looking for, which is a bucket vector, with length equal to days:

> bucket
   [1] 1081  808  259  624  209  898  389  659 1112 1112  321  569  569 1112  898  479  168...

And it seems to be set right, since the conditions of the loop are accomplished:

> sum(days<bucket)
[1] 2000

The problem comes when instead of using random numbers to set the variable days, I use my original data with 26835 entries:

> head(days)
[1] 319 319 319 319 319 319
> tail(days)
[1] 227 319 227  13  13 308
> length(days)
[1] 26835
> class(days)
[1] "numeric"

The variable days is defined from a personal CSV in the following code:

library(lubridate)

positions = read.csv("Positions_ABO&AVB_20210215_R_file.csv", header = TRUE, sep = ";")
positions$DEL.DATEEND = mdy(positions$DEL.DATEEND)
positions$DEL.DATEEND = as.Date(positions$DEL.DATEEND)
positions$DEL.DATESTART = mdy(positions$DEL.DATESTART)
positions$DEL.DATESTART = as.Date(positions$DEL.DATESTART)
positions$days = positions$DEL.DATEEND - as.Date(mdy("02/15/2021"))
write.csv2("filewithdates.csv")
##As of this point, the code is the same
mbuckets = as.data.frame(c(1,2,3,4,5,6,7,8,15,22,29,59,89,119,137,168,209,229,259,
            299,321,351,389,412,443,479,502,533,569,594,624,659,686,
            716,749,777,808,839,867,898,929,959,1019,1020,1051,1081,
            1112,1142,1173))
bucket = rep(0, length(positions$days))
cvar = 0
days = as.numeric(positions$days)
for (i in days) {
  cvar = cvar+1
  for (j in 1:length(mbuckets[,1])) {
    if(i<=mbuckets[j+1,1] & i>mbuckets[j,1]) {
      bucket[cvar] = mbuckets[j+1,1]
    }
  }
}

When I run the previous code instead of the reproducible one, I get the error described in the title:

Error in if (i <= mbuckets[j + 1, 1] & i > mbuckets[j, 1]) { : missing value where TRUE/FALSE needed

the bucket vector is set, but not correctly, since when I check if all the i are higher than j (condition within the loop to gather the value into bucket) it doesn't hold always like in the reproducible code:

> sum(positions$days<bucket)
[1] 5059
> sum(positions$days>=bucket)
[1] 2177

The problem is obviously in the variable days but I cannot find out what the problem is I tried to set the logical operator to && instead of & but it didn't work.

Aucun commentaire:

Enregistrer un commentaire