I've written a script using selenium implementing multiprocessing within it taking the idea of this answer. The script works just fine and I see all the results in the console. However, when the execution is done, I can't see any such signs at the bottom of the IDE which indicates that the process is accomplished.
The following images have been taken from python's default IDE and sublime text.
import threading
import concurrent.futures
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
threadLocal = threading.local()
def create_browser():
driver = getattr(threadLocal, 'driver', None)
if driver is None:
options = webdriver.ChromeOptions()
options.add_argument("--headless")
driver = webdriver.Chrome(options=options)
setattr(threadLocal, 'driver', driver)
return driver
def get_links(link):
driver = create_browser()
driver.get(link)
for elem in WebDriverWait(driver,10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR,".summary .question-hyperlink"))):
yield elem.get_attribute("href")
def get_title(url):
driver = create_browser()
driver.get(url)
title = WebDriverWait(driver,10).until(EC.presence_of_element_located((By.CSS_SELECTOR,"h1[itemprop='name'] > a.question-hyperlink"))).text
return title
if __name__ == '__main__':
base = "https://stackoverflow.com{}"
URL = "https://stackoverflow.com/questions/tagged/web-scraping?tab=newest&page=1&pagesize=50"
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
future_to_url = {executor.submit(get_title, link): link for link in get_links(URL)}
for item in concurrent.futures.as_completed(future_to_url):
print(item.result())
How can I terminate the process when the execution is accomplished?
from Can't let a script accomplish it's task in a conventional manner
No comments:
Post a Comment