I'm trying to create a script to parse different part numbers from a webpage using requests. If you check on this link and click on Product list tab, you will see the part numbers. This image represents where the part numbers are.
I've tried with:
import requests
link = 'https://www.festo.com/cat/en-id_id/products_ADNH'
post_url = 'https://www.festo.com/cfp/camosHTML5Client/cH5C/HRQ'
payload = {"q":4,"ReqID":21,"focus":"f24~v472_0","scroll":[],"events":["e468~12~0~472~0~4","e468_0~6~472"],"ito":22,"kms":4}
with requests.Session() as s:
s.headers['user-agent'] = 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'
s.headers['referer'] = 'https://www.festo.com/cfp/camosHTML5Client/cH5C/go?q=2'
s.headers['content-type'] = 'application/json; charset=UTF-8'
r = s.post(post_url,data=payload)
print(r.json())
When I execute the above script, I get the following result:
{'isRedirect': True, 'url': '../../camosStatic/Exception.html'}
How can I fetch the part numbers from that site using requests?
In case of selenium, I tried like below to fetch the part numbers but it seems the script can't click on the product list tab if I kick out hardcoded delay from it. Given that I don't wish to go for any hardcoded delay within the script.
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
link = 'https://www.festo.com/cat/en-id_id/products_ADNH'
with webdriver.Chrome() as driver:
driver.get(link)
wait = WebDriverWait(driver,15)
wait.until(EC.frame_to_be_available_and_switch_to_it(wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "object")))))
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#btn-group-cookie > input[value='Accept all cookies']"))).click()
driver.switch_to.default_content()
wait.until(EC.frame_to_be_available_and_switch_to_it(wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "iframe#CamosIFId")))))
time.sleep(10) #I would like to get rid of this hardcoded delay
item = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "[id='r17'] > [id='f24']")))
driver.execute_script("arguments[0].click();",item)
for elem in wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "[data-ctcwgtname='tabTable'] [id^='v471_']")))[1:]:
print(elem.text)
from Can't find the right way to grab part numbers from a webpage using requests
No comments:
Post a Comment