lundi 6 novembre 2017

Parallel elseif in r

Hello! I have this part of a code that takes so much time to run because rna_data its too big. I want to know how could i use the parallel and foreach package to make it faster.I've search on the internet how to use it, but im not sure if this is the correct way:

install.packages('doParallel')  
library(doParallel)  
num_cores <- detectCores()-1
cl <- makeCluster(num_cores)
registerDoParallel(cl)

foreach(i in 1:nrow(rna_data),.combine = c) %dopar%
CODE
stopCluster(cl)

So the complete code would look like this:

install.packages('doParallel')  
library(doParallel)  
num_cores <- detectCores()-1
cl <- makeCluster(num_cores)
registerDoParallel(cl)

foreach(i in 1:nrow(rna_data),.combine = c) %dopar%
 {fila = rna_data[i,]
  muestra = fila$submitted_sample_id
  muestra = gsub("\n", "",fixed = T, muestra)
  muestra = sub(".*-", "", strsplit(muestra, "-")[[1]][4])
  muestra = gsub("[A-Z]", "", muestra)
  as.integer(muestra)

  icgc_donor_id = as.character(fila$icgc_donor_id)

  if (muestra < 10){        #Muestra mutada
    datos_muestras_tumorales[as.character(fila$HGNC_symbol), icgc_donor_id] = as.double(fila$normalized_read_count)

    if (as.character(tabla_status_donor[icgc_donor_id, "disease_status_last_followup"]) == "complete remission"){
      datos_muestras_remission[as.character(fila$HGNC_symbol), icgc_donor_id] = as.double(fila$normalized_read_count)
   }
    else if (as.character(tabla_status_donor[icgc_donor_id, "disease_status_last_followup"]) == "progression"){
      datos_muestras_progression[as.character(fila$HGNC_symbol), icgc_donor_id] = as.double(fila$normalized_read_count)
    }

    if (as.character(tabla_status_donor[icgc_donor_id, "donor_vital_status"]) == "alive"){
  datos_donor_alive[as.character(fila$HGNC_symbol), icgc_donor_id] = as.double(fila$normalized_read_count)
    }
    else if (as.character(tabla_status_donor[icgc_donor_id, "donor_vital_status"]) == "deceased"){
  datos_donor_deceased[as.character(fila$HGNC_symbol), icgc_donor_id] = as.double(fila$normalized_read_count)
    }

  }
  else if (muestra > 10){        #Muestra sana
datos_muestras_sanas[as.character(fila$HGNC_symbol), icgc_donor_id] = as.double(fila$normalized_read_count)
  }
}

stopCluster(cl)

Aucun commentaire:

Enregistrer un commentaire