mercredi 19 juin 2019

Vectorize loop with if statement over dataframe and lists

I have a dataframe of text documents:

structure(list(
rowname = c("1", "2", "3", "4"), 
Datetime = c("2017-06-19", "2017-04-19", "2017-11-17", "2017-11-03"), 
content = c("aren t you absolut fed up with ", "the line between tax", "there ar peopl you ve", "ivan putrov 37 becam "), 
word_count = c(806L, 2808L, 741L, 766L), 
split = list(c("aren", t", "you", "absolut", "fed", "up", "with"), c("the", "line", "between", "tax"), c("there", "ar", "peopl", "you", "ve"), c("ivan", "putrov", "37", "becam")), 
allNOT = list(c(23L, 46L, 54L, 62L, 93L, 104L, 113L, 277L, 387L, 401L, 411L, 439L, 492L, 526L, 653L, 749L), c(59L, 74L, 96L, 160L, 368L, 588L, 764L, 813L, 819L, 949L, 955L, 1276L, 1382L, 1463L, 
1478L, 1500L, 1564L, 1772L, 1992L, 2168L, 2217L, 2223L, 2353L, 2359L, 2680L, 2786L), c(6L, 95L, 686L), c(107L, 372L, 414L, 627L, 643L)), 
allSTOPS = list(c(3L, 6L, 7L, 8L, 9L, 11L, 12L, 17L, 18L, 21L, 22L, 23L, 24L, 26L, 27L, 29L, 30L, 33L, 34L, 35L, 39L, 41L, 43L, 45L, 48L, 49L, 51L, 53L, 56L, 59L, 61L, 63L, 
64L, 68L, 70L, 73L, 74L, 75L, 78L, 80L, 82L, 85L, 87L, 93L, 96L, 97L, 99L, 100L, 102L, 103L, 104L, 106L, 107L, 108L, 110L, 112L, 113L, 115L, 119L, 122L, 124L, 127L, 129L, 130L, 132L, 135L, 136L, 
138L, 140L, 142L, 143L, 144L, 146L, 149L, 150L, 151L, 152L, 153L, 155L, 157L, 158L, 160L, 161L, 162L, 165L, 168L, 170L, 176L, 177L, 178L, 180L, 183L, 188L, 189L, 195L, 196L, 199L, 201L, 203L, 206L, 
207L, 208L, 209L, 210L, 212L, 213L, 214L, 215L, 218L, 219L, 220L, 222L, 223L, 224L, 226L, 228L, 231L, 234L, 236L, 237L, 238L, 240L, 241L, 243L, 245L, 249L, 251L, 252L, 253L, 254L, 258L, 260L, 264L, 
267L, 270L, 271L, 273L, 275L, 276L, 277L, 278L, 279L, 283L, 286L, 290L, 292L, 294L, 295L, 296L, 297L, 301L, 302L, 304L, 308L, 310L, 312L, 315L, 316L, 318L, 320L, 322L, 324L, 325L, 326L, 327L, 330L, 
331L, 334L, 337L, 338L, 340L, 342L, 345L, 346L, 348L, 350L, 351L, 353L, 355L, 356L, 359L, 364L, 365L, 366L, 369L, 371L, 372L, 375L, 377L, 378L, 380L, 381L, 382L, 385L, 387L, 389L, 390L, 392L, 394L, 
395L, 396L, 399L, 400L, 401L, 405L, 407L, 408L, 410L, 411L, 413L, 414L, 418L, 419L, 421L, 423L, 424L, 427L, 428L, 430L, 433L, 435L, 436L, 438L, 439L, 440L, 442L, 443L, 449L, 451L, 452L, 455L, 458L, 
459L, 460L, 462L, 464L, 465L, 467L, 469L, 471L, 472L, 477L, 478L, 480L, 482L, 484L, 488L, 489L, 491L, 492L, 494L, 495L, 496L, 498L, 500L, 501L, 503L, 506L, 508L, 509L, 510L, 511L, 515L, 518L, 520L, 
522L, 523L, 526L, 527L, 528L, 529L, 531L, 532L, 533L, 534L, 535L, 537L, 538L, 539L, 541L, 543L, 547L, 549L, 552L, 555L, 556L, 558L, 561L, 563L, 565L, 567L, 569L, 571L, 572L, 576L, 580L, 581L, 582L, 
584L, 585L, 586L, 588L, 591L, 592L, 593L, 595L, 597L, 598L, 601L, 604L, 608L, 609L, 613L, 614L, 616L, 618L, 620L, 621L, 624L, 625L, 628L, 629L, 630L, 632L, 636L, 637L, 638L, 640L, 642L, 644L, 645L, 
646L, 649L, 651L, 652L, 653L, 654L, 657L, 658L, 660L, 661L, 662L, 664L, 665L, 668L, 669L, 671L, 673L, 674L, 676L, 677L, 678L, 682L, 683L, 684L, 687L, 689L, 693L, 695L, 697L, 698L, 700L, 703L, 704L, 
706L, 707L, 708L, 712L, 716L, 718L, 721L, 723L, 724L, 728L, 729L, 731L, 733L, 736L, 739L, 740L, 741L, 743L, 744L, 747L, 749L, 751L, 752L, 753L, 754L, 756L, 758L, 759L, 760L, 764L, 767L, 768L, 770L, 
771L, 773L, 777L, 779L, 782L, 784L, 786L, 787L, 788L, 794L, 796L, 797L, 799L, 800L, 802L), c(1L, 3L, 6L, 14L, 15L, 20L, 23L, 24L, 26L, 28L, 31L, 32L, 34L, 35L, 37L, 41L, 45L, 51L, 52L, 55L, 58L, 
59L, 62L, 65L, 66L, 68L, 70L, 72L, 74L, 75L, 78L, 80L, 83L, 84L, 89L, 93L, 94L, 96L, 110L, 112L, 114L, 116L, 118L, 120L, 121L, 122L, 124L, 126L, 127L, 131L, 133L, 136L, 137L, 138L, 141L, 143L, 
147L, 151L, 152L, 154L, 156L, 157L, 159L, 161L, 164L, 165L, 171L, 172L, 175L, 177L, 179L, 185L, 190L, 194L, 196L, 198L, 199L, 200L, 202L, 204L, 208L, 212L, 217L, 219L, 220L, 222L, 226L, 227L, 230L, 
231L, 235L, 238L, 239L, 240L, 245L, 247L, 249L, 251L, 252L, 254L, 255L, 258L, 259L, 262L, 268L, 270L, 272L, 273L, 276L, 277L, 283L, 284L, 290L, 294L, 299L, 302L, 303L, 304L, 307L, 311L, 313L, 315L, 
317L, 320L, 322L, 325L, 326L, 330L, 331L, 333L, 336L, 337L, 340L, 342L, 348L, 350L, 353L, 356L, 357L, 360L, 363L, 365L, 366L, 367L, 368L, 370L, 374L, 375L, 378L, 380L, 383L, 384L, 388L, 390L, 395L, 
396L, 404L, 405L, 408L, 412L, 417L, 423L, 426L, 427L, 429L, 431L, 432L, 434L, 435L, 440L, 442L, 443L, 445L, 447L, 449L, 450L, 451L, 452L, 458L, 460L, 462L, 463L, 467L, 470L, 472L, 473L, 482L, 486L, 
487L, 496L, 497L, 498L, 500L, 501L, 505L, 506L, 509L, 512L, 514L, 517L, 518L, 521L, 523L, 524L, 526L, 527L, 530L, 531L, 533L, 534L, 535L, 536L, 541L, 543L, 549L, 553L, 554L, 556L, 558L, 561L, 563L, 
564L, 566L, 570L, 572L, 574L, 578L, 579L, 583L, 585L, 587L, 588L, 589L, 593L, 596L, 597L, 599L, 600L, 602L, 603L, 606L, 608L, 609L, 611L, 612L, 613L, 617L, 618L, 620L, 621L, 622L, 625L, 626L, 627L, 
634L, 635L, 636L, 639L, 640L, 643L, 645L, 646L, 648L, 649L, 650L, 651L, 654L, 657L, 658L, 659L, 661L, 663L, 665L, 667L, 671L, 674L, 676L, 679L, 685L, 688L, 689L, 690L, 693L, 697L, 699L, 700L, 704L, 
705L, 708L, 709L, 713L, 716L, 718L, 719L, 721L, 724L, 726L, 731L, 733L, 736L, 737L, 739L, 740L, 743L, 747L, 750L, 752L, 756L, 759L, 761L, 763L, 764L, 766L, 771L, 773L, 776L, 779L, 783L, 784L, 786L, 
788L, 789L, 791L, 795L, 798L, 802L, 803L, 804L, 806L, 808L, 809L, 813L, 815L, 816L, 817L, 819L, 820L, 822L, 823L, 827L, 828L, 832L, 835L, 840L, 842L, 847L, 853L, 855L, 858L, 859L, 860L, 865L, 866L, 
868L, 874L, 876L, 877L, 881L, 883L, 886L, 891L, 892L, 901L, 903L, 904L, 905L, 908L, 914L, 918L, 919L, 923L, 926L, 932L, 933L, 935L, 936L, 941L, 943L, 947L, 948L, 949L, 953L, 956L, 959L, 961L, 962L, 
971L, 973L, 974L, 976L, 978L, 980L, 985L, 988L, 991L, 992L, 994L, 995L, 998L, 1000L, 1001L, 1003L, 1008L, 1010L, 1013L, 1014L, 1017L, 1019L, 1024L, 1025L, 1027L, 1030L, 1033L, 1034L, 1035L, 
1039L, 1040L, 1042L, 1043L, 1049L, 1050L, 1055L, 1058L, 1060L, 1069L, 1074L, 1076L, 1080L, 1085L, 1087L, 1091L, 1097L, 1098L, 1099L, 1102L, 1105L, 1109L, 1113L, 1115L, 1117L, 1119L, 1121L, 
1122L, 1126L, 1127L, 1130L, 1131L, 1133L, 1135L, 1139L, 1141L, 1144L, 1145L, 1150L, 1152L, 1153L, 1155L, 1158L, 1161L, 1165L, 1167L, 1168L, 1171L, 1175L, 1177L, 1179L, 1181L, 1183L, 1186L, 
1191L, 1194L, 1198L, 1203L, 1205L, 1206L, 1210L, 1213L, 1215L, 1216L, 1217L, 1218L, 1219L, 1220L, 1223L, 1225L, 1228L, 1231L, 1233L, 1234L, 1237L, 1239L, 1242L, 1243L, 1246L, 1250L, 1252L, 
1255L, 1256L, 1259L, 1262L, 1266L, 1268L, 1270L, 1272L, 1275L, 1276L, 1278L, 1280L, 1282L, 1283L, 1284L, 1286L, 1288L, 1289L, 1293L, 1295L, 1297L, 1299L, 1300L, 1302L, 1303L, 1305L, 1307L, 
1308L, 1309L, 1310L, 1312L, 1316L, 1318L, 1319L, 1320L, 1325L, 1327L, 1329L, 1332L, 1333L, 1335L, 1341L, 1343L, 1344L, 1350L, 1356L, 1357L, 1359L, 1363L, 1364L, 1367L, 1368L, 1373L, 1374L, 
1377L, 1380L, 1382L, 1383L, 1386L, 1387L, 1388L, 1389L, 1392L, 1398L, 1405L, 1407L, 1410L, 1418L, 1419L, 1424L, 1427L, 1428L, 1430L, 1432L, 1435L, 1436L, 1438L, 1439L, 1441L, 1445L, 1449L, 
1455L, 1456L, 1459L, 1462L, 1463L, 1466L, 1469L, 1470L, 1472L, 1474L, 1476L, 1478L, 1479L, 1482L, 1484L, 1487L, 1488L, 1493L, 1497L, 1498L, 1500L, 1514L, 1516L, 1518L, 1520L, 1522L, 1524L, 
1525L, 1526L, 1528L, 1530L, 1531L, 1535L, 1537L, 1540L, 1541L, 1542L, 1545L, 1547L, 1551L, 1555L, 1556L, 1558L, 1560L, 1561L, 1563L, 1565L, 1568L, 1569L, 1575L, 1576L, 1579L, 1581L, 1583L, 
1589L, 1594L, 1598L, 1600L, 1602L, 1603L, 1604L, 1606L, 1608L, 1612L, 1616L, 1621L, 1623L, 1624L, 1626L, 1630L, 1631L, 1634L, 1635L, 1639L, 1642L, 1643L, 1644L, 1649L, 1651L, 1653L, 1655L, 
1656L, 1658L, 1659L, 1662L, 1663L, 1666L, 1672L, 1674L, 1676L, 1677L, 1680L, 1681L, 1687L, 1688L, 1694L, 1698L, 1703L, 1706L, 1707L, 1708L, 1711L, 1715L, 1717L, 1719L, 1721L, 1724L, 1726L, 
1729L, 1730L, 1734L, 1735L, 1737L, 1740L, 1741L, 1744L, 1746L, 1752L, 1754L, 1757L, 1760L, 1761L, 1764L, 1767L, 1769L, 1770L, 1771L, 1772L, 1774L, 1778L, 1779L, 1782L, 1784L, 1787L, 1788L, 
1792L, 1794L, 1799L, 1800L, 1808L, 1809L, 1812L, 1816L, 1821L, 1827L, 1830L, 1831L, 1833L, 1835L, 1836L, 1838L, 1839L, 1844L, 1846L, 1847L, 1849L, 1851L, 1853L, 1854L, 1855L, 1856L, 1862L, 
1864L, 1866L, 1867L, 1871L, 1874L, 1876L, 1877L, 1886L, 1890L, 1891L, 1900L, 1901L, 1902L, 1904L, 1905L, 1909L, 1910L, 1913L, 1916L, 1918L, 1921L, 1922L, 1925L, 1927L, 1928L, 1930L, 1931L, 
1934L, 1935L, 1937L, 1938L, 1939L, 1940L, 1945L, 1947L, 1953L, 1957L, 1958L, 1960L, 1962L, 1965L, 1967L, 1968L, 1970L, 1974L, 1976L, 1978L, 1982L, 1983L, 1987L, 1989L, 1991L, 1992L, 1993L, 
1997L, 2000L, 2001L, 2003L, 2004L, 2006L, 2007L, 2010L, 2012L, 2013L, 2015L, 2016L, 2017L, 2021L, 2022L, 2024L, 2025L, 2026L, 2029L, 2030L, 2031L, 2038L, 2039L, 2040L, 2043L, 2044L, 2047L, 
2049L, 2050L, 2052L, 2053L, 2054L, 2055L, 2058L, 2061L, 2062L, 2063L, 2065L, 2067L, 2069L, 2071L, 2075L, 2078L, 2080L, 2083L, 2089L, 2092L, 2093L, 2094L, 2097L, 2101L, 2103L, 2104L, 2108L, 
2109L, 2112L, 2113L, 2117L, 2120L, 2122L, 2123L, 2125L, 2128L, 2130L, 2135L, 2137L, 2140L, 2141L, 2143L, 2144L, 2147L, 2151L, 2154L, 2156L, 2160L, 2163L, 2165L, 2167L, 2168L, 2170L, 2175L, 
2177L, 2180L, 2183L, 2187L, 2188L, 2190L, 2192L, 2193L, 2195L, 2199L, 2202L, 2206L, 2207L, 2208L, 2210L, 2212L, 2213L, 2217L, 2219L, 2220L, 2221L, 2223L, 2224L, 2226L, 2227L, 2231L, 2232L, 
2236L, 2239L, 2244L, 2246L, 2251L, 2257L, 2259L, 2262L, 2263L, 2264L, 2269L, 2270L, 2272L, 2278L, 2280L, 2281L, 2285L, 2287L, 2290L, 2295L, 2296L, 2305L, 2307L, 2308L, 2309L, 2312L, 2318L, 
2322L, 2323L, 2327L, 2330L, 2336L, 2337L, 2339L, 2340L, 2345L, 2347L, 2351L, 2352L, 2353L, 2357L, 2360L, 2363L, 2365L, 2366L, 2375L, 2377L, 2378L, 2380L, 2382L, 2384L, 2389L, 2392L, 2395L, 
2396L, 2398L, 2399L, 2402L, 2404L, 2405L, 2407L, 2412L, 2414L, 2417L, 2418L, 2421L, 2423L, 2428L, 2429L, 2431L, 2434L, 2437L, 2438L, 2439L, 2443L, 2444L, 2446L, 2447L, 2453L, 2454L, 2459L, 
2462L, 2464L, 2473L, 2478L, 2480L, 2484L, 2489L, 2491L, 2495L, 2501L, 2502L, 2503L, 2506L, 2509L, 2513L, 2517L, 2519L, 2521L, 2523L, 2525L, 2526L, 2530L, 2531L, 2534L, 2535L, 2537L, 2539L, 
2543L, 2545L, 2548L, 2549L, 2554L, 2556L, 2557L, 2559L, 2562L, 2565L, 2569L, 2571L, 2572L, 2575L, 2579L, 2581L, 2583L, 2585L, 2587L, 2590L, 2595L, 2598L, 2602L, 2607L, 2609L, 2610L, 2614L, 
2617L, 2619L, 2620L, 2621L, 2622L, 2623L, 2624L, 2627L, 2629L, 2632L, 2635L, 2637L, 2638L, 2641L, 2643L, 2646L, 2647L, 2650L, 2654L, 2656L, 2659L, 2660L, 2663L, 2666L, 2670L, 2672L, 2674L, 
2676L, 2679L, 2680L, 2682L, 2684L, 2686L, 2687L, 2688L, 2690L, 2692L, 2693L, 2697L, 2699L, 2701L, 2703L, 2704L, 2706L, 2707L, 2709L, 2711L, 2712L, 2713L, 2714L, 2716L, 2720L, 2722L, 2723L, 
2724L, 2729L, 2731L, 2733L, 2736L, 2737L, 2739L, 2745L, 2747L, 2748L, 2754L, 2760L, 2761L, 2763L, 2767L, 2768L, 2771L, 2772L, 2777L, 2778L, 2781L, 2784L, 2786L, 2787L, 2790L, 2791L, 2792L, 
2793L, 2796L, 2802L), c(1L, 2L, 4L, 8L, 10L, 12L, 13L, 14L, 15L, 20L, 27L, 28L, 32L, 35L, 40L, 41L, 43L, 45L, 46L, 49L, 51L, 53L, 54L, 56L, 59L, 60L, 63L, 65L, 68L, 69L, 74L, 75L, 78L, 80L, 82L, 
84L, 90L, 95L, 98L, 99L, 101L, 103L, 105L, 107L, 110L, 112L, 113L, 115L, 118L, 120L, 121L, 123L, 128L, 130L, 135L, 137L, 139L, 149L, 151L, 153L, 155L, 158L, 162L, 163L, 166L, 167L, 172L, 174L, 
175L, 177L, 178L, 181L, 182L, 183L, 184L, 185L, 188L, 191L, 195L, 197L, 199L, 200L, 201L, 203L, 207L, 211L, 213L, 216L, 219L, 224L, 228L, 229L, 230L, 231L, 234L, 237L, 244L, 246L, 247L, 249L, 253L, 
256L, 257L, 259L, 262L, 264L, 265L, 268L, 269L, 274L, 275L, 277L, 279L, 280L, 281L, 284L, 290L, 292L, 294L, 296L, 297L, 299L, 301L, 302L, 314L, 319L, 322L, 323L, 325L, 329L, 331L, 333L, 334L, 335L, 
336L, 338L, 339L, 340L, 342L, 344L, 346L, 356L, 357L, 359L, 360L, 362L, 363L, 366L, 369L, 370L, 372L, 373L, 374L, 375L, 379L, 382L, 385L, 386L, 387L, 389L, 392L, 395L, 396L, 398L, 399L, 401L, 402L, 
407L, 408L, 412L, 415L, 416L, 418L, 420L, 421L, 425L, 426L, 428L, 431L, 433L, 435L, 437L, 438L, 440L, 445L, 447L, 449L, 450L, 451L, 458L, 459L, 460L, 463L, 464L, 466L, 467L, 470L, 476L, 477L, 481L, 
482L, 484L, 485L, 487L, 489L, 490L, 493L, 497L, 500L, 501L, 502L, 504L, 505L, 506L, 508L, 512L, 513L, 516L, 519L, 521L, 524L, 530L, 532L, 534L, 536L, 538L, 539L, 545L, 547L, 549L, 550L, 551L, 553L, 
554L, 558L, 559L, 561L, 565L, 567L, 569L, 572L, 573L, 576L, 578L, 579L, 581L, 583L, 585L, 587L, 588L, 590L, 591L, 592L, 594L, 596L, 597L, 601L, 602L, 604L, 605L, 607L, 611L, 616L, 617L, 618L, 619L, 
620L, 622L, 626L, 628L, 630L, 633L, 634L, 635L, 637L, 642L, 643L, 644L, 647L, 649L, 650L, 653L, 654L, 655L, 656L, 659L, 660L, 661L, 662L, 664L, 665L, 668L, 670L, 671L, 673L, 674L, 676L, 678L, 679L, 
681L, 683L, 684L, 687L, 689L, 691L, 692L, 695L, 699L, 715L, 719L, 721L, 724L, 727L, 729L, 730L, 736L, 737L, 738L, 741L), c(5L, 7L, 8L, 11L, 13L, 15L, 17L, 21L, 27L, 28L, 31L, 33L, 35L, 39L, 
40L, 41L, 43L, 46L, 48L, 50L, 52L, 53L, 57L, 59L, 60L, 62L, 64L, 65L, 67L, 71L, 73L, 75L, 76L, 78L, 80L, 83L, 85L, 86L, 88L, 90L, 92L, 95L, 97L, 101L, 103L, 104L, 106L, 107L, 109L, 110L, 116L, 
117L, 119L, 120L, 122L, 124L, 125L, 126L, 128L, 131L, 132L, 134L, 136L, 138L, 142L, 143L, 145L, 150L, 154L, 156L, 158L, 159L, 160L, 163L, 165L, 166L, 171L, 173L, 175L, 177L, 179L, 180L, 184L, 186L, 
187L, 188L, 190L, 191L, 193L, 195L, 198L, 200L, 202L, 204L, 208L, 210L, 212L, 215L, 216L, 217L, 220L, 221L, 223L, 226L, 227L, 231L, 234L, 235L, 236L, 239L, 240L, 241L, 242L, 244L, 246L, 251L, 252L, 
253L, 256L, 257L, 261L, 263L, 265L, 266L, 268L, 271L, 272L, 273L, 275L, 277L, 283L, 285L, 295L, 300L, 303L, 304L, 305L, 308L, 313L, 314L, 316L, 318L, 319L, 322L, 324L, 326L, 328L, 329L, 332L, 334L, 
335L, 337L, 338L, 339L, 342L, 343L, 344L, 346L, 348L, 349L, 351L, 352L, 353L, 354L, 355L, 356L, 358L, 359L, 362L, 365L, 366L, 368L, 370L, 371L, 372L, 373L, 375L, 377L, 379L, 380L, 381L, 382L, 387L, 
389L, 391L, 394L, 396L, 397L, 399L, 401L, 403L, 405L, 407L, 408L, 410L, 411L, 412L, 414L, 416L, 418L, 420L, 421L, 422L, 424L, 426L, 427L, 428L, 433L, 434L, 436L, 438L, 439L, 441L, 442L, 444L, 447L, 
451L, 453L, 456L, 457L, 458L, 460L, 462L, 463L, 465L, 467L, 468L, 470L, 471L, 473L, 474L, 476L, 478L, 479L, 480L, 482L, 484L, 486L, 488L, 492L, 493L, 495L, 496L, 498L, 500L, 502L, 503L, 505L, 506L, 
509L, 511L, 512L, 515L, 516L, 517L, 519L, 521L, 523L, 524L, 525L, 526L, 528L, 529L, 531L, 534L, 537L, 538L, 539L, 540L, 541L, 543L, 545L, 547L, 549L, 550L, 552L, 554L, 556L, 557L, 558L, 561L, 562L, 
564L, 566L, 567L, 571L, 572L, 575L, 576L, 577L, 579L, 581L, 582L, 585L, 586L, 593L, 594L, 595L, 597L, 599L, 601L, 603L, 605L, 607L, 608L, 609L, 611L, 613L, 615L, 617L, 619L, 621L, 622L, 623L, 626L, 
627L, 629L, 631L, 632L, 635L, 636L, 638L, 640L, 641L, 643L, 644L, 646L, 651L, 652L, 654L, 655L, 656L, 659L, 660L, 662L, 664L, 666L, 671L, 672L, 674L, 675L, 677L, 680L, 682L, 683L, 686L, 687L, 688L, 
689L, 691L, 693L, 694L, 696L, 698L, 699L, 701L, 703L, 705L, 707L, 708L, 710L, 712L, 713L, 717L, 720L, 726L, 727L, 738L, 740L, 742L, 743L, 744L, 749L, 752L, 754L, 755L, 761L, 762L, 763L, 766L)), 
stops_count = list(412L, 1120L, 319L, 380L), 
count = list(3L, 2L, 1L, 1L), 
allYES = list(c(133L, 550L, 560L), c(12L, 1416L), 144L, 248L), 
test = c(0, 0, 0, 0), 
.Names = c("rowname", "id", "Datetime", "content", "word_count", "split", "allNOT", "allSTOPS", "stops_count", "count", allYES", "test"), 
row.names = c(NA, -4L), 
groups = structure(list(id = (1", "2", "3", "4"), 
.rows = list(1L, 2L, 3L, 4L)), 
.Names = c("id", ".rows"), 
row.names = c(NA, -4L), 
class = c("tbl_df", "tbl", data.frame"), .drop = TRUE), 
class = c("grouped_df", "tbl_df", "tbl", "data.frame"))

the allYES & allNOT & allSTOPS columns are lists of positions of words that I search for in the documents. I am trying to use these positions to find various patterns based on distance (e.g. negations). The code below, does this and works fine, however, it does not scale and is extremely slow (the dreaded for loop!)

for(i in 1:nrow(temp_good)) {
    if(nrow(temp_good)==0) { next}
    a <- temp_good$allYES[[i]]
    b <- temp_good$allNOT[[i]]
    c <- abs(unlist(lapply(a, function(k) k - b)))
    d <- length(c[c>0&c<3])
    if(d>0) {
        temp_good$test[i] <- d
    } 
}

Now I am aware that the ifelse construct in R is vectorized but I can't get it to work. I tried a few different varieties and I am sure I am doing something silly somewhere, but I honestly can't see it. The code below generates the variable "c" from above

temp_good$newCol <- ifelse(nrow(temp_good)==0, {next}, abs(unlist(lapply(unlist(temp_good$allYES), function(k) k - unlist(temp_good$allNOT)))))

but returns the first value in the first iteration of the loop. If I run the inner calculation:

abs(unlist(lapply(unlist(temp_good$allYES[1]), function(k) k - unlist(temp_good$allNOT[1]))))

The result is correct, but I can't seem to coerce it into a dataframe element using the ifelse construct.

 110  87  79  71  40  29  20 144 254 268 278 306 359 393 520 616 527 504 496 488 457 446 437 273 163 149 139 111  58  24 103 199 537 514 506 498 467 456 447 283 173 159 149 121  68 34  93 189

Now this doesn't solve the entire problem but the I can't even get past this first step.

I also tried to wrap this code in a function:

calculateDiff <- function(x){ abs(unlist(lapply(unlist(temp_good$allYES), function(k) k - unlist(temp_good$allNOT))))}

and use lapply again to create the new column, but no luck.

Any advice and guidance on this is much appreciated.

Aucun commentaire:

Enregistrer un commentaire