lundi 5 juillet 2021

R - Triple Loop with if statement overjumps wrong lines

I am trying to perform a triple loop that contains an if-statement. Unfortunately, there must be a small mistake in my code, because it only gives me half of the result.

My dataframe is down below. I have 3 columns (ICD, Variables, Condition) that I am looping over. My aim is to create subdataframe out of my testframe that are containing all possible combinations out of this 3 coloums, grouped in that 1 subframe. So basically the subframe should have all entries, that have the same ICD, Variable and Condition.

Then I want to create another dataframe (data_list), in which I want to summarize certain information of this subdataframe. Each row in the data_list should represent the information of one subdataframe.

To aviod dealing with empty dataframes or dataframes that are not of interest (Correlations smaller than 0.3 or higher -0.3 I added an if-statement.

I have the problem, that is giving me a correct output, but not the full output. In this case I get Oz1 and Oz2, but SON_PC4 and Year_PC5 are missing. Does anyone have an idea where my code is wrong?

vari_list = sort(unique(testframe$Variables))
icd_list = sort(unique(testframe$ICD))
condi_list = sort(unique(testframe$Conditions))

data_list = data.frame(matrix(NA, nrow = 0,ncol = 15))
colnames(data_list) = c("ICD", "Variable", "Condition", "Corr_max", "Corr_min","Corr_mean", "NCor", "Corr_max_lag", "Corr_min_lag", "Gen_Max", "GenMin", "AgeMax", "AgeMin", "LocMax", "LocMin")

for (a in 1:length(icd_list)) {
  for (b in 1:length(vari_list)) {
    for (c in 1:length(condi_list)) {
      b1 = paste0("######### Working on - ", icd_list[a])
      b2 = paste(b1, vari_list[b], sep = "+")
      b3 = paste(b2, condi_list[c], sep = "+")
      print(b3)
      #############################
      illness = icd_list[a]
      vari = vari_list[b]
      condi = condi_list[c]
      
      testframe_select = testframe[testframe$Variables == vari ,]
      testframe_select = testframe_select [testframe_select $ICD == illness ,]
      testframe_select = testframe_select [testframe_select $Conditions == condi ,]
      
      rownames(testframe_select) = NULL
      
      if (nrow(testframe_select )<1) next
      if (abs(max(testframe_select $Correlations)) <= 0.3) next
      
      Platz = nrow(data_list)
  
      max1 = testframe_select [which.max(testframe_select $Correlations),]
      min1 = testframe_select [which.min(testframe_select $Correlations),]
      
      data_list[Platz+1,1] = max1$ICD
      data_list[Platz+1,2] = max1$Variables
      data_list[Platz+1,3] = max1$Conditions
      data_list[Platz+1,4] = max1$Correlations
      data_list[Platz+1,5] = min1$Correlations
      data_list[Platz+1,6] = round(mean(testframe_select$Correlations),2)
      data_list[Platz+1,7] = nrow(testframe_select)
      data_list[Platz+1,8] = max1$Lag
      data_list[Platz+1,9] = min1$Lag
      data_list[Platz+1,10] = max1$Gender
      data_list[Platz+1,11] = min1$Gender
      data_list[Platz+1,12] = max1$Age
      data_list[Platz+1,13] = min1$Age
      data_list[Platz+1,14] = max1$Loc
      data_list[Platz+1,15] = min1$Loc

    }}}

My example dataframe

structure(list(Variables = c("Oz1", "Oz1", 
"Oz2", "Oz2", "Oz1", 
"Oz1", "SON_PC4", "SON_PC4", "Oz2", 
"Oz2", "SON_PC4", "SON_PC4", "Oz3", 
"Oz4", "PM3", "Year_PC7", 
"Oz3", "Oz4", "PM3", 
"Year_PC7", "PM1", "Year_PC7", "PM2", 
"Year_PC7", "PM1"), ICD = c("P35-P39", "P35-P39", 
"P35-P39", "P35-P39", "P35-P39", "P35-P39", "P35-P39", "P35-P39", 
"P35-P39", "P35-P39", "P35-P39", "P35-P39", "P35-P39", "P35-P39", 
"P35-P39", "P50-P61", "P35-P39", "P35-P39", "P35-P39", "P50-P61", 
"P35-P39", "P50-P61", "P35-P39", "P50-P61", "P35-P39"), Correlations = c(-0.37, 
-0.37, -0.35, -0.35, -0.34, -0.34, -0.31, -0.31, -0.31, -0.31, 
-0.28, -0.28, -0.28, -0.28, -0.28, -0.28, -0.28, -0.28, -0.28, 
-0.28, -0.27, -0.27, -0.27, -0.27, -0.27), pvalue = c(0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0), Days = c("184", "184", "184", "184", "184", "184", "182", 
"182", "184", "184", "182", "182", "184", "184", "184", "184", 
"184", "184", "184", "184", "184", "184", "184", "184", "184"
), Cases = c("125", "125", "125", "125", "107", "107", "107", 
"107", "107", "107", "123", "123", "125", "125", "125", "127", 
"125", "125", "125", "127", "107", "112", "107", "112", "125"
), Age = c("indifferent", "Baby", "indifferent", "Baby", "indifferent", 
"Baby", "indifferent", "Baby", "indifferent", "Baby", "indifferent", 
"Baby", "indifferent", "indifferent", "indifferent", "indifferent", 
"Baby", "Baby", "Baby", "Baby", "indifferent", "indifferent", 
"Baby", "Baby", "indifferent"), Gender = c("indifferent", "indifferent", 
"indifferent", "indifferent", "indifferent", "indifferent", "indifferent", 
"indifferent", "indifferent", "indifferent", "indifferent", "indifferent", 
"indifferent", "indifferent", "indifferent", "indifferent", "indifferent", 
"indifferent", "indifferent", "indifferent", "indifferent", "indifferent", 
"indifferent", "indifferent", "indifferent"), Loc = c("indifferent", 
"indifferent", "indifferent", "indifferent", "indifferent", "indifferent", 
"indifferent", "indifferent", "indifferent", "indifferent", "indifferent", 
"indifferent", "indifferent", "indifferent", "indifferent", "indifferent", 
"indifferent", "indifferent", "indifferent", "indifferent", "indifferent", 
"indifferent", "indifferent", "indifferent", "indifferent"), 
    Conditions = c("JJA", "JJA", "JJA", "JJA", "JJA", "JJA", 
    "SON", "SON", "JJA", "JJA", "SON", "SON", "JJA", "JJA", "JJA", 
    "JJA", "JJA", "JJA", "JJA", "JJA", "JJA", "JJA", "JJA", "JJA", 
    "JJA"), Lag = c("L7", "L7", "L7", "L7", "L6", "L6", "L5", 
    "L5", "L6", "L6", "L6", "L6", "L7", "L7", "L7", "L7", "L7", 
    "L7", "L7", "L7", "L6", "L6", "L6", "L6", "L7")), row.names = c(NA, 
25L), class = "data.frame")

Aucun commentaire:

Enregistrer un commentaire