Tuesday, 20 December 2022

Can't click on the next page after scraping content from inner pages using selenium

I'm trying to scrape job title and required skills of different jobs from a webpage. As I'm not an expert on selenium, I can't figure out how I can scrape content from inner pages and then click on the next pages cyclically using selenium. Currently, the logic of clicking on the next page is commented out within the "get_links" function.

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

link = 'https://www.indeed.com/jobs?q=developer&sc=0kf%3Aattr%28DSQF7%29%3B&start=640&pp=gQPAAAABhR6C4g8AAAAB8f6BVABIAQEBBg-PHLEDms2oSIodfSmVxw09STnASEoBTK5mKYOEa4i4O_Ur1l0A-QxgzLqNt1E6GP8A47DqWEqCMSpmIabUq7qaIzRCAAA&vjk=8008aba345c406ba'

def get_links(driver,link):
    driver.get(link)
    link_list = []
    for item in WebDriverWait(driver,20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR,".job_seen_beacon"))):
        title_link = item.find_element(By.CSS_SELECTOR,"h2 > a[class^='jcs-JobTitle']").get_attribute("href")
        link_list.append(title_link)
    return link_list

    # try:
    #     next_page = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.CSS_SELECTOR,"a[aria-label='Next Page']")))
    #     driver.execute_script("arguments[0].click();",next_page)
    # except Exception as err:
    #     break


def get_content(link):
    driver.get(link)
    title = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.CSS_SELECTOR,"h1.jobsearch-JobInfoHeader-title"))).text
    try:
        skill = driver.find_element(By.XPATH,"//*[@id='jobDescriptionText']//div[./div/b[contains(.,'Required Skills')]]").get_attribute("textContent")
    except Exception as err: skill = ""
    return title,skill


if __name__ == '__main__':
    with webdriver.Chrome() as driver:
        for item in get_links(driver,link):
            print(get_content(item))


from Can't click on the next page after scraping content from inner pages using selenium

No comments:

Post a Comment