I'm trying to scrape job title
and required skills
of different jobs from a webpage. As I'm not an expert on selenium, I can't figure out how I can scrape content from inner pages and then click on the next pages cyclically using selenium. Currently, the logic of clicking on the next page is commented out within the "get_links" function.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
link = 'https://www.indeed.com/jobs?q=developer&sc=0kf%3Aattr%28DSQF7%29%3B&start=640&pp=gQPAAAABhR6C4g8AAAAB8f6BVABIAQEBBg-PHLEDms2oSIodfSmVxw09STnASEoBTK5mKYOEa4i4O_Ur1l0A-QxgzLqNt1E6GP8A47DqWEqCMSpmIabUq7qaIzRCAAA&vjk=8008aba345c406ba'
def get_links(driver,link):
driver.get(link)
link_list = []
for item in WebDriverWait(driver,20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR,".job_seen_beacon"))):
title_link = item.find_element(By.CSS_SELECTOR,"h2 > a[class^='jcs-JobTitle']").get_attribute("href")
link_list.append(title_link)
return link_list
# try:
# next_page = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.CSS_SELECTOR,"a[aria-label='Next Page']")))
# driver.execute_script("arguments[0].click();",next_page)
# except Exception as err:
# break
def get_content(link):
driver.get(link)
title = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.CSS_SELECTOR,"h1.jobsearch-JobInfoHeader-title"))).text
try:
skill = driver.find_element(By.XPATH,"//*[@id='jobDescriptionText']//div[./div/b[contains(.,'Required Skills')]]").get_attribute("textContent")
except Exception as err: skill = ""
return title,skill
if __name__ == '__main__':
with webdriver.Chrome() as driver:
for item in get_links(driver,link):
print(get_content(item))
from Can't click on the next page after scraping content from inner pages using selenium
No comments:
Post a Comment