samedi 16 mars 2019

R nested for loop to fill in blank variables until reaching a specific timestamp

I'm working with a dataset that has timestamps(POSIXct formatted) for every half hour (but there are duplicates for some time chunks, and not the same number of duplicates for each). There are three columns A, B1, B2 that note the location of a ship in a harbor. There are two more columns with the timestamp for arrival and departure of each ship. I want to carry forward the A's, B1's, and B2's until reaching the departure timestamp to actually denote how long the ships are present not just when they arrive.

Here's what the table looks like (with many many more rows... 2 million...)

           [Timestamp]           [A]       [B1]        [B2]         [Arrival]             [Departure]        
      [1,] "2018-04-19 08:00:00" "A"        NA          NA          "2018-04-19 08:00:00" "2018-04-20 06:00:00"
      [2,] "2018-04-29 07:00:00" "A"        NA          NA          "2018-04-29 07:00:00" "2018-04-29 15:00:00"
      [3,] "2018-04-30 08:00:00" "A"        NA          NA          "2018-04-30 08:00:00" "2018-04-30 18:00:00"
      [4,] "2018-05-11 08:00:00" "A"        NA          NA          "2018-05-11 08:00:00" "2018-05-11 17:00:00"
      [5,] "2018-05-14 08:00:00" "A"        NA          NA          "2018-05-14 08:00:00" "2018-05-14 18:00:00"
      [6,] "2018-05-18 08:00:00" "A"        NA          NA          "2018-05-18 08:00:00" "2018-05-18 17:00:00"
      [7,] "2018-05-20 07:00:00" NA         "B1"        NA          "2018-05-20 07:00:00" "2018-05-20 17:00:00"
      [8,] "2018-05-20 08:00:00" "A"        NA          NA          "2018-05-20 08:00:00" "2018-05-20 17:00:00"
      [9,] "2018-05-22 07:00:00" "A"        NA          NA          "2018-05-22 07:00:00" "2018-05-22 22:00:00"
     [10,] "2018-05-27 07:00:00" "A"        NA          NA          "2018-05-27 07:00:00" "2018-05-27 15:00:00"
     [11,] "2018-06-01 00:00:00" NA         NA          NA          NA                    NA                   
     [12,] "2018-06-01 00:30:00" NA         NA          NA          NA                    NA                   
     [13,] "2018-06-01 01:00:00" NA         NA          NA          NA                    NA                   
     [14,] "2018-06-01 01:30:00" NA         NA          NA          NA                    NA                   
     [15,] "2018-06-01 02:00:00" NA         NA          NA          NA                    NA                   
     [16,] "2018-06-01 02:30:00" NA         NA          NA          NA                    NA                   
     [17,] "2018-06-01 03:00:00" NA         NA          NA          NA                    NA                   
     [18,] "2018-06-01 03:30:00" NA         NA          NA          NA                    NA                   
     [19,] "2018-06-01 04:00:00" NA         NA          NA          NA                    NA                   
     [20,] "2018-06-01 04:30:00" NA         NA          NA          NA                    NA                   
     [21,] "2018-06-01 05:00:00" NA         NA          NA          NA                    NA                   
     [22,] "2018-06-01 05:30:00" NA         NA          NA          NA                    NA                   
     [23,] "2018-06-01 06:00:00" NA         NA          NA          NA                    NA                   
     [24,] "2018-06-01 06:30:00" NA         NA          NA          NA                    NA                   
     [25,] "2018-06-01 07:00:00" NA         "B1"        NA          "2018-06-01 07:00:00" "2018-06-01 22:00:00"
     [26,] "2018-06-01 07:30:00" NA          NA         NA          NA                    NA                   
     [27,] "2018-06-01 08:00:00" "A"         NA         NA          "2018-06-01 08:00:00" "2018-06-01 17:00:00"
     [28,] "2018-06-01 08:30:00"  NA         NA         NA          NA                    NA                   
     [29,] "2018-06-01 09:00:00"  NA         NA         NA          NA                    NA                   
     [30,] "2018-06-01 09:30:00"  NA         NA         NA          NA                    NA                   
     [31,] "2018-06-01 10:00:00"  NA         NA         NA          NA                    NA                   
     [32,] "2018-06-01 10:30:00"  NA         NA         NA          NA                    NA                   
     [33,] "2018-06-01 11:00:00"  NA         NA         NA          NA                    NA                   
     [34,] "2018-06-01 11:30:00"  NA         NA         NA          NA                    NA                   
     [35,] "2018-06-01 12:00:00"  NA         NA         NA          NA                    NA                   
     [36,] "2018-06-01 12:30:00"  NA         NA         NA          NA                    NA                   
     [37,] "2018-06-01 13:00:00"  NA         NA         NA          NA                    NA                   
     [38,] "2018-06-01 13:30:00"  NA         NA         NA          NA                    NA                   
     [39,] "2018-06-01 14:00:00"  NA         NA         NA          NA                    NA                   
     [40,] "2018-06-01 14:30:00"  NA         NA         NA          NA                    NA                   
     [41,] "2018-06-01 15:00:00"  NA         NA         NA          NA                    NA                   
     [42,] "2018-06-01 15:30:00"  NA         NA         NA          NA                    NA                   
     [43,] "2018-06-01 16:00:00"  NA         NA         NA          NA                    NA                   
     [44,] "2018-06-01 16:30:00"  NA         NA         NA          NA                    NA                   
     [45,] "2018-06-01 17:00:00"  NA         NA         NA          NA                    NA                   
     [46,] "2018-06-01 17:30:00"  NA         NA         NA          NA                    NA                   
     [47,] "2018-06-01 18:00:00"  NA         NA         NA          NA                    NA                   
     [48,] "2018-06-01 18:30:00"  NA         NA         NA          NA                    NA                   
     [49,] "2018-06-01 19:00:00"  NA         NA         NA          NA                    NA                   
     [50,] "2018-06-01 19:30:00"  NA         NA         NA          NA                    NA                   
     [51,] "2018-06-01 20:00:00"  NA         NA         NA          NA                    NA                   
     [52,] "2018-06-01 20:30:00"  NA         NA         NA          NA                    NA                   
     [53,] "2018-06-01 21:00:00"  NA         NA         NA          NA                    NA                   
     [54,] "2018-06-01 21:30:00"  NA         NA         NA          NA                    NA                   
     [55,] "2018-06-01 22:00:00"  NA         NA         NA          NA                    NA                   
     [56,] "2018-06-01 22:30:00" NA         NA          NA          NA                    NA                   
     [57,] "2018-06-01 23:00:00" NA         NA          NA          NA                    NA 

And this is what I'm looking for:

           [Timestamp]           [A]        [B1]       [B2]        [Arrival]               [Departure]        
      [1,] "2018-04-19 08:00:00" "A"        NA          NA          "2018-04-19 08:00:00" "2018-04-20 06:00:00"
      [2,] "2018-04-29 07:00:00" "A"        NA          NA          "2018-04-29 07:00:00" "2018-04-29 15:00:00"
      [3,] "2018-04-30 08:00:00" "A"        NA          NA          "2018-04-30 08:00:00" "2018-04-30 18:00:00"
      [4,] "2018-05-11 08:00:00" "A"        NA          NA          "2018-05-11 08:00:00" "2018-05-11 17:00:00"
      [5,] "2018-05-14 08:00:00" "A"        NA          NA          "2018-05-14 08:00:00" "2018-05-14 18:00:00"
      [6,] "2018-05-18 08:00:00" "A"        NA          NA          "2018-05-18 08:00:00" "2018-05-18 17:00:00"
      [7,] "2018-05-20 07:00:00" NA         "B1"        NA          "2018-05-20 07:00:00" "2018-05-20 17:00:00"
      [8,] "2018-05-20 08:00:00" "A"        "B1"        NA          "2018-05-20 08:00:00" "2018-05-20 17:00:00"
      [9,] "2018-05-22 07:00:00" "A"        NA          NA          "2018-05-22 07:00:00" "2018-05-22 22:00:00"
     [10,] "2018-05-27 07:00:00" "A"        NA          NA          "2018-05-27 07:00:00" "2018-05-27 15:00:00"
     [11,] "2018-06-01 00:00:00" NA         NA          NA          NA                    NA                   
     [12,] "2018-06-01 00:30:00" NA         NA          NA          NA                    NA                   
     [13,] "2018-06-01 01:00:00" NA         NA          NA          NA                    NA                   
     [14,] "2018-06-01 01:30:00" NA         NA          NA          NA                    NA                   
     [15,] "2018-06-01 02:00:00" NA         NA          NA          NA                    NA                   
     [16,] "2018-06-01 02:30:00" NA         NA          NA          NA                    NA                   
     [17,] "2018-06-01 03:00:00" NA         NA          NA          NA                    NA                   
     [18,] "2018-06-01 03:30:00" NA         NA          NA          NA                    NA                   
     [19,] "2018-06-01 04:00:00" NA         NA          NA          NA                    NA                   
     [20,] "2018-06-01 04:30:00" NA         NA          NA          NA                    NA                   
     [21,] "2018-06-01 05:00:00" NA         NA          NA          NA                    NA                   
     [22,] "2018-06-01 05:30:00" NA         NA          NA          NA                    NA                   
     [23,] "2018-06-01 06:00:00" NA         NA          NA          NA                    NA                   
     [24,] "2018-06-01 06:30:00" NA         NA          NA          NA                    NA                   
     [25,] "2018-06-01 07:00:00" NA         "B1"        NA          "2018-06-01 07:00:00" "2018-06-01 22:00:00"
     [26,] "2018-06-01 07:30:00" NA         "B1"        NA          NA                    NA                   
     [27,] "2018-06-01 08:00:00" "A"        "B1"        NA          "2018-06-01 08:00:00" "2018-06-01 17:00:00"
     [28,] "2018-06-01 08:30:00" "A"        "B1"        NA          NA                    NA                   
     [29,] "2018-06-01 09:00:00" "A"        "B1"        NA          NA                    NA                   
     [30,] "2018-06-01 09:30:00" "A"        "B1"        NA          NA                    NA                   
     [31,] "2018-06-01 10:00:00" "A"        "B1"        NA          NA                    NA                   
     [32,] "2018-06-01 10:30:00" "A"        "B1"        NA          NA                    NA                   
     [33,] "2018-06-01 11:00:00" "A"        "B1"        NA          NA                    NA                   
     [34,] "2018-06-01 11:30:00" "A"        "B1"        NA          NA                    NA                   
     [35,] "2018-06-01 12:00:00" "A"        "B1"        NA          NA                    NA                   
     [36,] "2018-06-01 12:30:00" "A"        "B1"        NA          NA                    NA                   
     [37,] "2018-06-01 13:00:00" "A"        "B1"        NA          NA                    NA                   
     [38,] "2018-06-01 13:30:00" "A"        "B1"        NA          NA                    NA                   
     [39,] "2018-06-01 14:00:00" "A"        "B1"        NA          NA                    NA                   
     [40,] "2018-06-01 14:30:00" "A"        "B1"        NA          NA                    NA                   
     [41,] "2018-06-01 15:00:00" "A"        "B1"        NA          NA                    NA                   
     [42,] "2018-06-01 15:30:00" "A"        "B1"        NA          NA                    NA                   
     [43,] "2018-06-01 16:00:00" "A"        "B1"        NA          NA                    NA                   
     [44,] "2018-06-01 16:30:00" "A"        "B1"        NA          NA                    NA                   
     [45,] "2018-06-01 17:00:00" "A"        "B1"        NA          NA                    NA                   
     [46,] "2018-06-01 17:30:00" NA         "B1"        NA          NA                    NA                   
     [47,] "2018-06-01 18:00:00" NA         "B1"        NA          NA                    NA                   
     [48,] "2018-06-01 18:30:00" NA         "B1"        NA          NA                    NA                   
     [49,] "2018-06-01 19:00:00" NA         "B1"        NA          NA                    NA                   
     [50,] "2018-06-01 19:30:00" NA         "B1"        NA          NA                    NA                   
     [51,] "2018-06-01 20:00:00" NA         "B1"        NA          NA                    NA                   
     [52,] "2018-06-01 20:30:00" NA         "B1"        NA          NA                    NA                   
     [53,] "2018-06-01 21:00:00" NA         "B1"        NA          NA                    NA                   
     [54,] "2018-06-01 21:30:00" NA         "B1"        NA          NA                    NA                   
     [55,] "2018-06-01 22:00:00" NA         "B1"        NA          NA                    NA                   
     [56,] "2018-06-01 22:30:00" NA          NA         NA          NA                    NA                   
     [57,] "2018-06-01 23:00:00" NA          NA         NA          NA                    NA 

Here's the data:

dput(loopdata[1:57,])
structure(list(FinalTimestamp = structure(c(1524142800, 1525003200, 
1525093200, 1526043600, 1526302800, 1526648400, 1526817600, 1526821200, 
1526990400, 1527422400, 1527829200, 1527831000, 1527832800, 1527834600, 
1527836400, 1527838200, 1527840000, 1527841800, 1527843600, 1527845400, 
1527847200, 1527849000, 1527850800, 1527852600, 1527854400, 1527856200, 
1527858000, 1527859800, 1527861600, 1527863400, 1527865200, 1527867000, 
1527868800, 1527870600, 1527872400, 1527874200, 1527876000, 1527877800, 
1527879600, 1527881400, 1527883200, 1527885000, 1527886800, 1527888600, 
1527890400, 1527892200, 1527894000, 1527895800, 1527897600, 1527899400, 
1527901200, 1527903000, 1527904800, 1527906600, 1527908400, 1527910200, 
1527912000), class = c("POSIXct", "POSIXt"), tzone = "EST"), 
    AnchorageA = c("A", "A", "A", "A", "A", "A", NA, "A", "A", 
    "A", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, "A", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA), AnchorageB1 = c(NA, NA, NA, NA, NA, 
    NA, "B1", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, "B1", NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA), AnchorageB2 = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_
    ), TimestFormArr = structure(c(1524139200, 1524999600, 1525089600, 
    1526040000, 1526299200, 1526644800, 1526814000, 1526817600, 
    1526986800, 1527418800, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, 1527850800, NA, 1527854400, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), class = c("POSIXct", 
    "POSIXt"), tzone = ""), TimestFormDep = structure(c(1524218400, 
    1525028400, 1525125600, 1526072400, 1526335200, 1526677200, 
    1526850000, 1526850000, 1527040800, 1527447600, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1527904800, NA, 
    1527886800, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA), class = c("POSIXct", "POSIXt"), tzone = "")), .Names = c("FinalTimestamp", 
"AnchorageA", "AnchorageB1", "AnchorageB2", "TimestFormArr", 
"TimestFormDep"), row.names = c(NA, 57L), class = "data.frame")

Right now this is what I have for code to try to achieve this:

lastdate = 1
for(i in 1:length(loopdata$Timestamp))  
{
  if(i%%1000==0) print(i)
  if(!is.na(loopdata$Arrival[i]))
  {lastdate=i}
  if(loopdata$Timestamp[i] >= loopdata$Arrival[lastdate] & 
     loopdata$Timestamp[i] <= loopdata$Departure[lastdate])
  {loopdata[i,2:4]=loopdata[lastdate,2:4]}
}

The above code runs but it doesn't work. I usually stop it after 5,000 rows to check it (hence the print(i)) and there are no error messages. It carries forward the A's but it won't carry forward the B1's or B2's. Is this because it resets again because the first B1 is so quickly followed by an A? I've also tried using the same loop code with specific arrival and departure timestamps for B1 and B2 with an error message (Help! Thank you!

Aucun commentaire:

Enregistrer un commentaire