jeudi 11 janvier 2018

How to create a new variable from (9) repeated values in R? Do I need loops?

Firstly, I apologize for the vagueness of the title. I have a dataset which contains dichotomous values coded 0 and 1 for a certain variable X. v001 is the subject identifier and the values from v1pc10le8 to v9pc10le8 are the values for X at each of the nine visits. In addition, firstpc10 and lastpc10 signify the first (baseline) and last measurements for X respectively.

      v001 firstpc10 lastpc10 v1pc10le8 v2pc10le8 v3pc10le8 v4pc10le8 v5pc10le8 v6pc10le8 v7pc10le8 v8pc10le8 v9pc10le8
1473 28084         0        0         0      <NA>         0      <NA>      <NA>         0         0      <NA>      <NA>
1474 28089         0        0      <NA>      <NA>      <NA>         0      <NA>         0      <NA>      <NA>      <NA>
1475 28102         0        1      <NA>      <NA>         0         0         0         0         1      <NA>      <NA>
1476 28103         0        1      <NA>      <NA>      <NA>         0         0         0         0         1         1
1477 28119         0        0      <NA>      <NA>      <NA>         0      <NA>         0         0         0      <NA>
1478 28184         0        1      <NA>      <NA>         0      <NA>      <NA>         0      <NA>      <NA>         1
1479 28202         1        1      <NA>      <NA>         1      <NA>         0         0         0         1         1
1480 28211         0        0         0      <NA>         0         0      <NA>      <NA>      <NA>      <NA>      <NA>
1481 28212         0        1         0      <NA>      <NA>         1      <NA>      <NA>      <NA>      <NA>      <NA>
1482 28213         0        0      <NA>      <NA>         0      <NA>      <NA>         0      <NA>      <NA>      <NA>
1483 28214         0        0      <NA>      <NA>      <NA>         0         0         0      <NA>         1         0
1484 28215         0        0      <NA>      <NA>      <NA>         0      <NA>         0         0         0         0
1485 28232         0        1      <NA>      <NA>         0      <NA>         0         1      <NA>      <NA>      <NA>
1486 28244         1        1         1      <NA>      <NA>      <NA>         0         0         0         0         1
1487 28258         0        1      <NA>      <NA>      <NA>         0      <NA>         0         1      <NA>         1
1488 28281         0        1      <NA>      <NA>      <NA>         0         0         0         1      <NA>      <NA>
1489 28303         0        0         0      <NA>      <NA>      <NA>      <NA>         0         0         0      <NA>
1490 28337         0        1      <NA>      <NA>         0      <NA>      <NA>         0      <NA>         1      <NA>
1491 28355         1        1      <NA>      <NA>         1      <NA>         0      <NA>         0         1      <NA>
1492 29983         0        0      <NA>      <NA>      <NA>         0         0      <NA>         0         0         0

I want to ignore all the NA and compute a new variable called "change" which has the following values:

1 - if subjects were 0 at baseline and remained 0 throughout

2 - if subjects were 1 at baseline and remained 1 throughout

3 - if subjects were 1 at baseline and changed to 0 (and remained 0 throughout)

4 - if subjects were 0 at baseline and changed to 1 (and remained 1 throughout)

5 - if subjects fluctuated between values of 0 and 1 without a trend (e.g subject #28214) - these are subjects who don't fit in the above 4 catagories

I tried to do this with SPSS and R but I am having huge difficulties and I will greatly appreciate any help. (I have included the dput output from R below).

Thank you!

structure(list(v001 = c(28084, 28089, 28102, 28103, 28119, 28184, 
28202, 28211, 28212, 28213, 28214, 28215, 28232, 28244, 28258, 
28281, 28303, 28337, 28355, 29983), firstpc10 = c(0, 0, 0, 0, 
0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0), lastpc10 = c(0, 
0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0), v1pc10le8 = c(0, 
NA, NA, NA, NA, NA, NA, 0, 0, NA, NA, NA, NA, 1, NA, NA, 0, NA, 
NA, NA), v2pc10le8 = c(NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_), v3pc10le8 = c(0, NA, 0, NA, NA, 0, 1, 0, 
NA, 0, NA, NA, 0, NA, NA, NA, NA, 0, 1, NA), v4pc10le8 = c(NA, 
0, 0, 0, 0, NA, NA, 0, 1, NA, 0, 0, NA, NA, 0, 0, NA, NA, NA, 
0), v5pc10le8 = c(NA, NA, 0, 0, NA, NA, 0, NA, NA, NA, 0, NA, 
0, 0, NA, 0, NA, NA, 0, 0), v6pc10le8 = c(0, 0, 0, 0, 0, 0, 0, 
NA, NA, 0, 0, 0, 1, 0, 0, 0, 0, 0, NA, NA), v7pc10le8 = c(0, 
NA, 1, 0, 0, NA, 0, NA, NA, NA, NA, 0, NA, 0, 1, 1, 0, NA, 0, 
0), v8pc10le8 = c(NA, NA, NA, 1, 0, NA, 1, NA, NA, NA, 1, 0, 
NA, 0, NA, NA, 0, 1, 1, 0), v9pc10le8 = c(NA, NA, NA, 1, NA, 
1, 1, NA, NA, NA, 0, 0, NA, 1, 1, NA, NA, NA, NA, 0)), .Names = c("v001", 
"firstpc10", "lastpc10", "v1pc10le8", "v2pc10le8", "v3pc10le8", 
"v4pc10le8", "v5pc10le8", "v6pc10le8", "v7pc10le8", "v8pc10le8", 
"v9pc10le8"), row.names = 1473:1492, class = "data.frame")

Aucun commentaire:

Enregistrer un commentaire