I am working on a script that will scrape a page and find the names of adoptable dogs. I am able to scrape and append the names to a list. But, I am having trouble having the code run continuously and append new names to a new list and remove them from the old list. Wondering if someone could help me work through this.
import requests
from bs4 import BeautifulSoup
import re
import time
from twilio.rest import Client
url = 'http://ift.tt/2vsMlXn'
response = requests.get(url)
html = response.content
account_sid = ("XXXXXXXXXXXXXXXXXXXXXXXXXXX")
auth_token = ("XXXXXXXXXXXXXXXXXXXXXXXXXXXX")
client = Client(account_sid, auth_token)
soup = BeautifulSoup(html, 'html.parser')
names = soup.find_all(text=re.compile("My name is(.*)"))
def check():
old = []
new = []
newest = []
for name in names:
name = name.title()
if name not in old:
old.append(name[11:-2])
if name in old:
continue
for name in names:
name = name.title()
if name in old:
continue
if name not in new and name not in old:
new.append(name[11:-2])
if name not in new and name in old:
new.append(name[11:-2])
old.remove(name)
if name in new and name in old:
old.remove(name)
new.remove(name)
for name in names:
name = name.title()
if name in old or name in new:
continue
if name not in old and name not in new:
newest.append(name[11:-2])
num_old = len(old)
num_new = len(new)
num_newest = len(newest)
print("Old List: " + str(old))
print("Number of dogs in the old list: " + str(num_old))
print("New List: " + str(new))
print("Number of new dogs: " + str(num_new))
print("Newest List: " + str(newest))
print("Number of newest dogs: " + str(num_newest))
#client.api.account.messages.create(to = "+XXXXXXXXXX",
#from_= "+XXXXXXXXXX",
#body = "Here are some new dogs:" + str(new))
#client.api.account.messages.create(to="+XXXXXXXXXX",
#from_="+XXXXXXXXXX",
#body=("There are " + str(num_newest) + " new puppies"), media_url = 'http://ift.tt/2vsMlXn')
#client.api.account.messages.create(to = "+XXXXXXXXXX",
#from_= "+XXXXXXXXXX",
#body = "Here are some new names:" + str(newest))
while True:
check()
time.sleep(20)
Aucun commentaire:
Enregistrer un commentaire