lundi 21 juin 2021

Vectorize loop codes

I have a number of nested for and if loops code which I am applying to different datasets which increase by 10 folds. The code works perfectly but takes very long to run (currently 10 days for a dataset with a length of 310). I am running the code on the Rstudio server on localhost. I would like to vectorize this but with the many nested for and if loops, I am even sure of where to start. I need to run 370 similar codes, hence my request for help.

Any pointers will be appreciated.

Here is the code (my apologies, the code is quiet lengthy):


library(e1071)
library(mcga)

ptm <- proc.time()

data<-read.csv("apr_data2.csv",header=TRUE)
n<-nrow(data)
frag<-data[(n-29):n,]
## subset daily data greater than zero
frag2<-frag[frag$V4>0.1,]

 ## fragments from subset data
frag3<-frag[,5:28]/frag[,4]

## daily data without corresponding sub-hourly data
data_2D = data$V4 
num2 = 1:length(data_2D)

## daily data with corresponding sub-hourly data
total<- frag$V4 
num = 1:length(total)

##wet or dry state indicator
status1<-ifelse(frag$V4>=0.1,1,0) 
status2<-ifelse(data$V4>=0.1,1,0)

## classes for sampling
break99<-c(0.1,1,10,25,Inf)

##preallocate space for disaggregated data
Disagg_data<-data.frame(matrix(nrow=length(data_2D),ncol=24)) 

##prealocate space for simulation runs
runs = 100
final<-matrix(NA,nrow=length(data_2D)*ncol(Disagg_data),ncol = runs)

## begin simulation runs
for (sim in 1:runs){

## put daily data with corresponding sub-daily data into classes
for (k in 2:(length(total)-1)){
    if (is.na(total[k])) next
    if (total[k]>0){
      for (i in 1:(length(break99)-1)){
        if (total[k]>=break99[i]&total[k]<break99[i+1]){
          oxt<-num[total>=break99[i]&total<break99[i+1]]
          oxt<-oxt[oxt!=1 & oxt!=length(total)]

}
}
}
}

## put daily data to be disaggregated into classes
for (t in 2:(length(data_2D)-1)){
    if (is.na(data_2D[t])) next
    if (data_2D[t]>0){
      for (i in 1:(length(break99)-1)){
        if (data_2D[t]>=break99[i]&data_2D[t]<break99[i+1]){
          oyt<-num2[data_2D>=break99[i]&data_2D<break99[i+1]]
          oyt<-oyt[oyt!=1 & oyt!=length(data_2D)]

}
}
}
}

## identify days with matching wetness 
## sample 2 and use crossover algorithm to create 2 new fragments
## add new fragments and sample one to disaggregate daily data
for (k in 2:(length(total)-1)){
for (t in 2:(length(data_2D)-1)){
    if (length(oxt[!is.na(oxt)])>1){
     if (length(oyt[!is.na(oyt)])>1){
      if (length(oxt[!is.na(oxt)&status1[oxt-1]==status1[k-1]&status1[oxt+1]==status1[k+1]])&length(oyt[!is.na(oyt)&status2[oyt-1]==status2[t-1]&status2[oyt+1]==status2[t+1]])>=2){
         index<-sample(oxt[!is.na(oxt)&status1[oxt-1]==status1[k-1]&status1[oxt+1]==status1[k+1]],2)
         v1<-frag[index[1],5:28]/frag[index[1],4]
         v2<-frag[index[2],5:28]/frag[index[2],4]
       cut<-sample(1:24,1)
        child<-OnePointCrossOverOnDoublesUsingBytes(unlist(v1),unlist(v2),cut)
        child1<-child[[1]]/sum(child[[1]]) 
        child2<-child[[2]]/sum(child[[2]])
          combine<-rbind(v1,v2,child1,child2)
          select_hourly_vector<-combine[sample(nrow(combine),1),]
          Disagg_data[t,1:24]<-data_2D[t]*select_hourly_vector

## when matched fragments is less than 2
      if (length(oxt[!is.na(oxt)&status1[oxt-1]==status1[k-1]&status1[oxt+1]==status1[k+1]])&length(oyt[!is.na(oyt)&status2[oyt-1]==status2[t-1]&status2[oyt+1]==status2[t+1]])==1){
         index<-sample(oxt[!is.na(oxt)&status1[oxt-1]==status1[k-1]&status1[oxt+1]==status1[k+1]],1)
         select_hourly_vector<-frag[index,5:28]/frag[index,4]
          Disagg_data[t,1:24]<-data_2D[t]*select_hourly_vector

## when there are no matched fragments, any fragment is sampled and used to disaggregated the daily data
   if (length(oxt[!is.na(oxt)&status1[oxt-1]==status1[k-1]&status1[oxt+1]==status1[k+1]])&length(oyt[!is.na(oyt)&status2[oyt-1]==status2[t-1]&status2[oyt+1]==status2[t+1]])==0){
       index<-sample(length(total),1)
       select_hourly_vector<-frag[index,5:28]/frag[index,4]
        Disagg_data[t,1:24]<-data_2D[t]*select_hourly_vector
}
}   
}
}
}
}
}

 final[,sim] = as.vector(t(Disagg_data))
}

write.csv(final,"onepoint apr 10yrs.csv")

proc.time() - ptm



Data: https://www.dropbox.com/s/9rhftd5xai2xu7q/apr_data2.csv?dl=0

Aucun commentaire:

Enregistrer un commentaire