require 'open-uri'
require 'nokogiri'
require 'byebug'
def fetch_recipe_urls
base_url = 'https://cooking.nytimes.com'
easy_recipe_url = 'https://cooking.nytimes.com/search?q=easy'
easy_searchpage = Nokogiri::HTML(open(easy_recipe_url))
recipes = easy_searchpage.search('//article[@class="card recipe-card"]/@data-url')
links_to_recipes = easy_searchpage.search('//article[@class="card collection-card"]/@data-url')
all_recipes = easy_searchpage.search('//section[@class="recipe-card-list track-card-params"]')
all_recipes.each do |recipe|
if recipe == easy_searchpage.search('//article[@class="card collection-card"]/@data-url')
link_to_recipes_url_array = links_to_recipes.map do |recipe|
uri = URI.parse(recipe.text)
uri.scheme = "http"
uri.host = "cooking.nytimes.com"
uri.query = nil
uri.to_s
end
acutal_pages_of_recipes = links_to_recipes_url_array.map do |recipe|
uri = URI.parse(recipe.text)
uri.scheme = "http"
uri.host = "cooking.nytimes.com"
uri.query = nil
uri.to_s
end
else
recipes_url_array = recipes.map do |recipe|
uri = URI.parse(recipe.text)
uri.scheme = "http"
uri.host = "cooking.nytimes.com"
uri.query = nil
uri.to_s
end
end
end
end
I want to check whether an html div is either a card collection-card or a card recipe-card. This is because if the card is a card recipe card it requires one less iteration because it only has one a link within the card. On the other hand a card collection-card has many links and thus requires two rounds of iterations.
I have posted the link below so that if one wants to inspect the html page they can: https://cooking.nytimes.com/search?q=easy
Aucun commentaire:
Enregistrer un commentaire