mardi 7 juillet 2020

Check for multiple NA columns and return another column in R

I have a dataframe that has multiple columns named as "avg_metric", "wkday_avg_metric", "event_avg_metric" and "monthly_avg_metric", in which "metric" consists of multiple metrics with these calculations (orders, revenue, etc). I have to check for multiple columns if their rows have NAs and replace them with a row from another column. For that, I created a function that does the same verification for the column "metric" I specify. The thing is that I'm getting the same value for the entire new column that I'm creating, which should not be the case.

I added below an example_fixed on what should be the outcome.

Is there an easier way of doing that? Or am I lacking some logic in the function?

Tks.

library(tidyverse)

(example <- tibble(avg_visits = c(5028, NA, NA, NA),
                   wkday_avg_visits = c(1234, 4355, NA, NA),
                   event_avg_visits = c(51271, 59212, 98773, NA),
                   monthly_avg_visits = c(5028, 5263, 6950, 8902)))
#> # A tibble: 4 x 4
#>   avg_visits wkday_avg_visits event_avg_visits monthly_avg_visits
#>        <dbl>            <dbl>            <dbl>              <dbl>
#> 1       5028             1234            51271               5028
#> 2         NA             4355            59212               5263
#> 3         NA               NA            98773               6950
#> 4         NA               NA               NA               8902

subs_metric <- function(data, metric) {
  
  avg <- paste0("avg_", metric)
  wkday_avg <- paste0("wkday_avg_", metric)
  event_avg <- paste0("event_avg_", metric)
  monthly_avg <- paste0("monthly_avg_", metric)
  
  for (i in nrow(data)) {
    
      value <- if (is.na(data[[avg]][i]) & is.na(data[[wkday_avg]][i]) & is.na(data[[event_avg]][i])) {
        data[[monthly_avg]][i]
      } else if (is.na(data[[avg]][i]) & is.na(data[[wkday_avg]][i])) {
        data[[event_avg]][i]
      } else if (is.na(data[[avg]][i])) {
        data[[wkday_avg]][i]
      } else {
        data[[avg]][i]
      }
    
      return(value) 
  }
}
  

example %>% 
  mutate(avg_visits_new = subs_metric(., "visits"))
#> # A tibble: 4 x 5
#>   avg_visits wkday_avg_visits event_avg_visits monthly_avg_visits avg_visits_new
#>        <dbl>            <dbl>            <dbl>              <dbl>          <dbl>
#> 1       5028             1234            51271               5028           8902
#> 2         NA             4355            59212               5263           8902
#> 3         NA               NA            98773               6950           8902
#> 4         NA               NA               NA               8902           8902

(example_fixed <- tibble(avg_visits = c(5028, NA, NA, NA),
                   wkday_avg_visits = c(1234, 4355, NA, NA),
                   event_avg_visits = c(51271, 59212, 98773, NA),
                   monthly_avg_visits = c(5028, 5263, 6950, 8902),
                   avg_visits_new = c(5028, 4355, 98773, 8902)))
#> # A tibble: 4 x 5
#>   avg_visits wkday_avg_visits event_avg_visits monthly_avg_visits avg_visits_new
#>        <dbl>            <dbl>            <dbl>              <dbl>          <dbl>
#> 1       5028             1234            51271               5028           5028
#> 2         NA             4355            59212               5263           4355
#> 3         NA               NA            98773               6950          98773
#> 4         NA               NA               NA               8902           8902

Created on 2020-07-07 by the reprex package (v0.3.0)

Aucun commentaire:

Enregistrer un commentaire