I do have a website which i query it with my pattern after solving CAPTCHA and if that pattern found, then it's downloading a PDF file but my issue i couldn't force FireFox to download the file automatically to the current working directory without user interaction.
also on the other side how i can control if there's file found and downloaded or not.
Something like if the pattern is invalid then we have no file so i want to display that there's no file found for that pattern.
Here's my current working code which do everything till the file download menu popup:
import argparse
import logging
from time import sleep
import os
import requests
from selenium import webdriver
from selenium.common import exceptions
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.firefox.options import Options
logger = logging.getLogger('tst-log-query')
logger.setLevel(logging.DEBUG)
formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)-5.5s - %(message)s', "%Y-%m-%d %H:%M:%S")
file_handler = logging.FileHandler(
'tst-log-query.log', 'w', encoding='utf-8')
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
mainurl = "https://cndt-certidao.tst.jus.br/inicio.faces"
ckey = "f1a382ddd51949057324a7fc7c8ccf8a"
def solver(captcha):
with requests.Session() as req:
print("[*] - Please wait while CAPTCHA is solved ")
cdata1 = {
"clientKey": ckey,
"task": {
"type": "ImageToTextTask",
"body": captcha
}
}
cdata2 = {
"clientKey": ckey
}
while True:
try:
r = req.post(
'https://api.anti-captcha.com/createTask', json=cdata1)
cdata2['taskId'] = r.json()['taskId']
break
except KeyError:
logger.debug(r.json()["errorDescription"])
continue
while True:
sleep(5)
logger.info("Slept 5 Seconds!")
fr = req.post(
'https://api.anti-captcha.com/getTaskResult', json=cdata2)
status = fr.json()
logger.debug("Status: {}".format(status["status"]))
if status['status'] == "ready":
cap = status['solution']['text']
print("[*] - CAPTCHA Solved!")
return cap
else:
continue
def main(pat):
# saving to current working directory
options = Options()
options.set_preference('browser.download.folderList', 2)
options.set_preference('browser.download.manager.showWhenStarting', False)
options.set_preference('browser.download.dir', os.getcwd())
options.set_preference(
'browser.helperApps.neverAsk.saveToDisk', 'pdf')
#__________________________#
driver = webdriver.Firefox(options=options)
print(f"Checking (CNPJ/CPF)# {pat}")
while True:
try:
driver.get(mainurl)
waiter = WebDriverWait(driver, 60)
waiter.until(
EC.element_to_be_clickable(
(By.CSS_SELECTOR, "input[value=Regularização]"))
).click()
waiter.until(
EC.presence_of_element_located(
(By.CSS_SELECTOR, "#consultarRegularizacaoForm\:cpfCnpj"))
).send_keys(pat)
cap = waiter.until(
EC.presence_of_element_located(
(By.CSS_SELECTOR, "img[src^=data]"))).get_attribute('src').split(',', 1)[1]
break
except exceptions.TimeoutException:
logger.error('[*] - Unable to found elements, Refreshing Request.')
continue
capso = solver(cap)
if capso:
driver.find_element(By.ID, 'idCaptcha').send_keys(capso)
driver.find_element(
By.ID, 'consultarRegularizacaoForm:btnEmitirCertidao').click()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Download PDF File!')
parser.add_argument(
'pattern', metavar="(CNPJ/CPF) Number", help="(CNPJ/CPF) Number", type=str)
try:
main(parser.parse_args().pattern)
except KeyboardInterrupt:
exit("Good Bye!")
Usage: python script.py 15436940000103
from Selenium save file directlry to current working directly
No comments:
Post a Comment