kwdArr = [
u'"Sin Tax"' -> might appear here
]
tryKWDS = [
u'"Sin Tax" "Cigarette"',
u'"Sin Tax" "Cigarettes"',
u'"Sin Tax" "E-cigarette"',
u'"Sin Tax" "E-cigarettes"',
u'"Sin Tax" "Heated Tobacco Product"',
u'"Sin Tax" "Heated Tobacco Products"',
u'"Sin Tax"' -> might appear here
]
How to prevent appending other sin tax with keywords in my kwdArr, since a single word of ("Sin Tax") appear in the array ? and vice-versa, all of "Sin Tax" with other words should append if there's no single word ("Sin Tax") in two arrays?
def removeDuplicates(seen_words):
tryDuplicateArr = []
for x in kwdArr:
tryDuplicateArr.append(x.replace('"', '').strip('\"'))
for words in tryDuplicateArr:
words = words.replace('"', '').strip('\"')
seen_words |= set(words.split())
def removeDupliWords(wordsArr):
seen_words = set()
for words in wordsArr:
rawWords = words
removeDuplicates(seen_words)
words = words.replace('"', '').strip('\"')
print(words)
seen = False
for word in words.split():
if word in seen_words:
seen = True
continue
print(seen)
if not seen:
checkRawWords = rawWords.replace('"', '').strip('\"')
result = checkRawWords.endswith("Corp")
if result == True:
rawWords = re.sub('Corp$', 'Corporation', checkRawWords)
rawWords = '"{}"'.format(rawWords)
kwdArr.append(u'{}'.format(rawWords))
tryKWDS.sort(key=lambda x: len(x.split()))
removeDupliWords(tryKWDS)
def removeDuplicates(seen_words):
tryDuplicateArr = []
for x in kwdArr:
tryDuplicateArr.append(x.replace('"', '').strip('\"'))
for words in tryDuplicateArr:
words = words.replace('"', '').strip('\"')
seen_words |= set(words.split())
def removeDupliWords(wordsArr):
seen_words = set()
for words in wordsArr:
rawWords = words
removeDuplicates(seen_words)
words = words.replace('"', '').strip('\"')
print(words)
seen = False
for word in words.split():
if word in seen_words:
seen = True
continue
print(seen)
if not seen:
checkRawWords = rawWords.replace('"', '').strip('\"')
result = checkRawWords.endswith("Corp")
if result == True:
rawWords = re.sub('Corp$', 'Corporation', checkRawWords)
rawWords = '"{}"'.format(rawWords)
kwdArr.append(u'{}'.format(rawWords))
tryKWDS.sort(key=lambda x: len(x.split()))
removeDupliWords(tryKWDS)
print(kwdArr)
This is my code so far, however the First ("sin tax" "cigarette") was appended if Sin Tax is not present in tryKWDS & and kwdArr but this is working if a single word of ("Sin Tax") is present?
Aucun commentaire:
Enregistrer un commentaire