Hello! I have this part of a code that takes so much time to run because rna_data its too big. I want to know how could i use the parallel and foreach package to make it faster.I've search on the internet how to use it, but im not sure if this is the correct way:
install.packages('doParallel')
library(doParallel)
num_cores <- detectCores()-1
cl <- makeCluster(num_cores)
registerDoParallel(cl)
foreach(i in 1:nrow(rna_data),.combine = c) %dopar%
CODE
stopCluster(cl)
So the complete code would look like this:
install.packages('doParallel')
library(doParallel)
num_cores <- detectCores()-1
cl <- makeCluster(num_cores)
registerDoParallel(cl)
foreach(i in 1:nrow(rna_data),.combine = c) %dopar%
{fila = rna_data[i,]
muestra = fila$submitted_sample_id
muestra = gsub("\n", "",fixed = T, muestra)
muestra = sub(".*-", "", strsplit(muestra, "-")[[1]][4])
muestra = gsub("[A-Z]", "", muestra)
as.integer(muestra)
icgc_donor_id = as.character(fila$icgc_donor_id)
if (muestra < 10){ #Muestra mutada
datos_muestras_tumorales[as.character(fila$HGNC_symbol), icgc_donor_id] = as.double(fila$normalized_read_count)
if (as.character(tabla_status_donor[icgc_donor_id, "disease_status_last_followup"]) == "complete remission"){
datos_muestras_remission[as.character(fila$HGNC_symbol), icgc_donor_id] = as.double(fila$normalized_read_count)
}
else if (as.character(tabla_status_donor[icgc_donor_id, "disease_status_last_followup"]) == "progression"){
datos_muestras_progression[as.character(fila$HGNC_symbol), icgc_donor_id] = as.double(fila$normalized_read_count)
}
if (as.character(tabla_status_donor[icgc_donor_id, "donor_vital_status"]) == "alive"){
datos_donor_alive[as.character(fila$HGNC_symbol), icgc_donor_id] = as.double(fila$normalized_read_count)
}
else if (as.character(tabla_status_donor[icgc_donor_id, "donor_vital_status"]) == "deceased"){
datos_donor_deceased[as.character(fila$HGNC_symbol), icgc_donor_id] = as.double(fila$normalized_read_count)
}
}
else if (muestra > 10){ #Muestra sana
datos_muestras_sanas[as.character(fila$HGNC_symbol), icgc_donor_id] = as.double(fila$normalized_read_count)
}
}
stopCluster(cl)
Aucun commentaire:
Enregistrer un commentaire