jeudi 24 août 2017

change content of a column by using nested if else statement - R

I am working on the diamonds dataset:

> dput(head(diamonds_small, 100))
structure(list(carat = c(0.23, 0.21, 0.23, 0.29, 0.31, 0.24, 
0.24, 0.26, 0.22, 0.23, 0.3, 0.23, 0.22, 0.31, 0.2, 0.32, 0.3, 
0.3, 0.3, 0.3, 0.3, 0.23, 0.23, 0.31, 0.31, 0.23, 0.24, 0.3, 
0.23, 0.23, 0.23, 0.23, 0.23, 0.23, 0.23, 0.23, 0.23, 0.31, 0.26, 
0.33, 0.33, 0.33, 0.26, 0.26, 0.32, 0.29, 0.32, 0.32, 0.25, 0.29, 
0.24, 0.23, 0.32, 0.22, 0.22, 0.3, 0.3, 0.3, 0.3, 0.3, 0.35, 
0.3, 0.3, 0.3, 0.42, 0.28, 0.32, 0.31, 0.31, 0.24, 0.24, 0.3, 
0.3, 0.3, 0.3, 0.26, 0.26, 0.26, 0.26, 0.26, 0.26, 0.26, 0.26, 
0.38, 0.26, 0.24, 0.24, 0.24, 0.24, 0.32, 0.7, 0.86, 0.7, 0.71, 
0.78, 0.7, 0.7, 0.96, 0.73, 0.8), cut = structure(c(5L, 4L, 2L, 
4L, 2L, 3L, 3L, 3L, 1L, 3L, 2L, 5L, 4L, 5L, 4L, 4L, 5L, 2L, 2L, 
3L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
2L, 2L, 2L, 3L, 5L, 5L, 5L, 2L, 2L, 2L, 4L, 3L, 2L, 3L, 3L, 3L, 
5L, 5L, 4L, 4L, 5L, 4L, 3L, 3L, 2L, 5L, 4L, 5L, 5L, 4L, 5L, 5L, 
3L, 4L, 4L, 3L, 3L, 4L, 4L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 5L, 
5L, 2L, 4L, 4L, 4L, 4L, 4L, 5L, 1L, 5L, 3L, 3L, 2L, 2L, 1L, 3L, 
4L), .Label = c("Fair", "Good", "Very Good", "Premium", "Ideal"
), class = c("ordered", "factor")), color = structure(c(2L, 2L, 
2L, 6L, 7L, 7L, 6L, 5L, 2L, 5L, 7L, 7L, 3L, 7L, 2L, 2L, 6L, 7L, 
7L, 7L, 6L, 2L, 5L, 7L, 7L, 4L, 6L, 7L, 1L, 3L, 3L, 3L, 2L, 2L, 
1L, 3L, 2L, 5L, 1L, 6L, 6L, 7L, 1L, 1L, 5L, 3L, 5L, 5L, 2L, 5L, 
3L, 4L, 6L, 2L, 1L, 6L, 7L, 6L, 6L, 6L, 6L, 1L, 1L, 1L, 6L, 4L, 
6L, 4L, 4L, 2L, 1L, 5L, 5L, 5L, 5L, 3L, 2L, 1L, 1L, 2L, 2L, 1L, 
2L, 6L, 2L, 4L, 5L, 5L, 5L, 6L, 2L, 2L, 4L, 2L, 4L, 2L, 3L, 3L, 
2L, 5L), .Label = c("D", "E", "F", "G", "H", "I", "J"), class = c("ordered", 
"factor")), clarity = structure(c(2L, 3L, 5L, 4L, 2L, 6L, 7L, 
3L, 4L, 5L, 3L, 5L, 3L, 2L, 2L, 1L, 2L, 3L, 3L, 3L, 2L, 4L, 5L, 
3L, 3L, 6L, 5L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 3L, 4L, 
2L, 2L, 3L, 4L, 5L, 2L, 3L, 2L, 2L, 4L, 2L, 3L, 5L, 3L, 4L, 4L, 
2L, 2L, 3L, 3L, 3L, 5L, 3L, 3L, 3L, 2L, 6L, 7L, 3L, 3L, 7L, 7L, 
3L, 3L, 3L, 3L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 6L, 2L, 7L, 7L, 7L, 
7L, 6L, 3L, 3L, 2L, 4L, 4L, 2L, 4L, 5L, 2L, 3L, 3L), .Label = c("I1", 
"SI2", "SI1", "VS2", "VS1", "VVS2", "VVS1", "IF"), class = c("ordered", 
"factor")), depth = c(61.5, 59.8, 56.9, 62.4, 63.3, 62.8, 62.3, 
61.9, 65.1, 59.4, 64, 62.8, 60.4, 62.2, 60.2, 60.9, 62, 63.4, 
63.8, 62.7, 63.3, 63.8, 61, 59.4, 58.1, 60.4, 62.5, 62.2, 60.5, 
60.9, 60, 59.8, 60.7, 59.5, 61.9, 58.2, 64.1, 64, 60.8, 61.8, 
61.2, 61.1, 65.2, 58.4, 63.1, 62.4, 61.8, 63.8, 63.3, 60.7, 60.9, 
61.9, 60.9, 61.6, 59.3, 61, 59.3, 62.6, 63, 63.2, 60.9, 62.6, 
62.5, 62.1, 61.5, 61.4, 62, 63.3, 61.8, 60.7, 61.5, 63.1, 62.9, 
62.5, 63.7, 59.2, 59.9, 62.4, 62.8, 62.6, 63.4, 62.1, 62.9, 61.6, 
57.9, 62.3, 61.2, 60.8, 60.7, 62.9, 62.5, 55.1, 61.6, 62.4, 63.8, 
57.5, 59.4, 66.3, 61.6, 61.5), table = c(55, 61, 65, 58, 58, 
57, 57, 55, 61, 61, 55, 56, 61, 54, 62, 58, 54, 54, 56, 59, 56, 
55, 57, 62, 62, 58, 57, 57, 61, 57, 57, 57, 59, 58, 58, 59, 59, 
54, 59, 55, 56, 56, 56, 63, 56, 58, 55, 56, 60, 60, 61, 54, 55, 
58, 62, 59, 61, 57, 57, 55, 57, 59, 57, 56, 59, 56, 55.3, 57, 
58, 58, 60, 56, 59, 57, 57, 60, 58, 54, 60, 59, 59, 60, 58, 56, 
60, 59, 58, 59, 58, 58, 57, 69, 56, 57, 56, 58, 62, 62, 59, 58
), price = c(326L, 326L, 327L, 334L, 335L, 336L, 336L, 337L, 
337L, 338L, 339L, 340L, 342L, 344L, 345L, 345L, 348L, 351L, 351L, 
351L, 351L, 352L, 353L, 353L, 353L, 354L, 355L, 357L, 357L, 357L, 
402L, 402L, 402L, 402L, 402L, 402L, 402L, 402L, 403L, 403L, 403L, 
403L, 403L, 403L, 403L, 403L, 403L, 403L, 404L, 404L, 404L, 404L, 
404L, 404L, 404L, 405L, 405L, 405L, 405L, 405L, 552L, 552L, 552L, 
552L, 552L, 553L, 553L, 553L, 553L, 553L, 553L, 554L, 554L, 554L, 
554L, 554L, 554L, 554L, 554L, 554L, 554L, 554L, 554L, 554L, 554L, 
554L, 554L, 554L, 554L, 554L, 2757L, 2757L, 2757L, 2759L, 2759L, 
2759L, 2759L, 2759L, 2760L, 2760L), x = c(3.95, 3.89, 4.05, 4.2, 
4.34, 3.94, 3.95, 4.07, 3.87, 4, 4.25, 3.93, 3.88, 4.35, 3.79, 
4.38, 4.31, 4.23, 4.23, 4.21, 4.26, 3.85, 3.94, 4.39, 4.44, 3.97, 
3.97, 4.28, 3.96, 3.96, 4, 4.04, 3.97, 4.01, 3.92, 4.06, 3.83, 
4.29, 4.13, 4.49, 4.49, 4.49, 3.99, 4.19, 4.34, 4.24, 4.35, 4.36, 
4, 4.33, 4.02, 3.93, 4.45, 3.93, 3.91, 4.3, 4.43, 4.25, 4.28, 
4.25, 4.54, 4.23, 4.29, 4.3, 4.78, 4.19, 4.39, 4.33, 4.35, 4.01, 
3.97, 4.29, 4.28, 4.29, 4.28, 4.19, 4.15, 4.08, 4.01, 4.06, 4, 
4.03, 4.02, 4.65, 4.22, 3.95, 4.01, 4.02, 4.07, 4.35, 5.7, 6.45, 
5.7, 5.68, 5.81, 5.85, 5.71, 6.27, 5.77, 5.97), y = c(3.98, 3.84, 
4.07, 4.23, 4.35, 3.96, 3.98, 4.11, 3.78, 4.05, 4.28, 3.9, 3.84, 
4.37, 3.75, 4.42, 4.34, 4.29, 4.26, 4.27, 4.3, 3.92, 3.96, 4.43, 
4.47, 4.01, 3.94, 4.3, 3.97, 3.99, 4.03, 4.06, 4.01, 4.06, 3.96, 
4.08, 3.85, 4.31, 4.16, 4.51, 4.5, 4.55, 4.02, 4.24, 4.37, 4.26, 
4.42, 4.38, 4.03, 4.37, 4.03, 3.95, 4.48, 3.89, 3.88, 4.33, 4.38, 
4.28, 4.32, 4.29, 4.59, 4.27, 4.32, 4.33, 4.84, 4.22, 4.42, 4.3, 
4.32, 4.03, 4, 4.27, 4.24, 4.25, 4.26, 4.22, 4.23, 4.13, 4.05, 
4.09, 4.04, 4.12, 4.06, 4.67, 4.25, 3.92, 3.96, 4, 4.04, 4.33, 
5.72, 6.33, 5.67, 5.73, 5.85, 5.9, 5.76, 5.95, 5.78, 5.93), z = c(2.43, 
2.31, 2.31, 2.63, 2.75, 2.48, 2.47, 2.53, 2.49, 2.39, 2.73, 2.46, 
2.33, 2.71, 2.27, 2.68, 2.68, 2.7, 2.71, 2.66, 2.71, 2.48, 2.41, 
2.62, 2.59, 2.41, 2.47, 2.67, 2.4, 2.42, 2.41, 2.42, 2.42, 2.4, 
2.44, 2.37, 2.46, 2.75, 2.52, 2.78, 2.75, 2.76, 2.61, 2.46, 2.75, 
2.65, 2.71, 2.79, 2.54, 2.64, 2.45, 2.44, 2.72, 2.41, 2.31, 2.63, 
2.61, 2.67, 2.71, 2.7, 2.78, 2.66, 2.69, 2.68, 2.96, 2.58, 2.73, 
2.73, 2.68, 2.44, 2.45, 2.7, 2.68, 2.67, 2.72, 2.49, 2.51, 2.56, 
2.53, 2.55, 2.55, 2.53, 2.54, 2.87, 2.45, 2.45, 2.44, 2.44, 2.46, 
2.73, 3.57, 3.52, 3.5, 3.56, 3.72, 3.38, 3.4, 4.07, 3.56, 3.66
)), .Names = c("carat", "cut", "color", "clarity", "depth", "table", 
"price", "x", "y", "z"), row.names = c(NA, -100L), class = c("tbl_df", 
"tbl", "data.frame"))

I want to change the column cut such that all observations that contain 'ideal' or 'premium' will have 'above average', all observations with 'good' and 'fair' will have 'below average. I have tried two codes, but none of them work. Here they are:

cut_new <- 
  ifelse(diamonds_small$cut == "Ideal", "Above average", 
      ifelse(diamonds_small$cut == "Premium", "Above average", 
          ifelse(diamonds_small$cut == "Very Good", "Very Good",
              ifelse(diamonds_small$cut == "Good", "Below average", 
                  ifelse(diamonds_small$cut == "Fair", "Below average")))))


if ( diamonds_small$cut = "Ideal") {
  diamonds_small$cut == "Above average"
} else if ( diamonds_small$cut == "Premium") {
  diamonds_small$cut == "Above average"
} else if ( diamonds_small$cut == "Good") {
  diamonds_small$cut == "Below average"
} else if ( diamonds_small$cut == "Fair") {
  diamonds_small$cut == "Below average"  
} else
  diamonds_small$cut == "other"

Any tips, comments will be greatly appriciated

Aucun commentaire:

Enregistrer un commentaire