Tuesday, 18 January 2022

Selenium save file directlry to current working directly

I do have a website which i query it with my pattern after solving CAPTCHA and if that pattern found, then it's downloading a PDF file but my issue i couldn't force FireFox to download the file automatically to the current working directory without user interaction.

also on the other side how i can control if there's file found and downloaded or not.

Something like if the pattern is invalid then we have no file so i want to display that there's no file found for that pattern.

Here's my current working code which do everything till the file download menu popup:

import argparse
import logging
from time import sleep
import os

import requests
from selenium import webdriver
from selenium.common import exceptions
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.firefox.options import Options

logger = logging.getLogger('tst-log-query')
logger.setLevel(logging.DEBUG)
formatter = logging.Formatter(
    '%(asctime)s - %(name)s - %(levelname)-5.5s - %(message)s', "%Y-%m-%d %H:%M:%S")
file_handler = logging.FileHandler(
    'tst-log-query.log', 'w', encoding='utf-8')
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)

mainurl = "https://cndt-certidao.tst.jus.br/inicio.faces"
ckey = "f1a382ddd51949057324a7fc7c8ccf8a"


def solver(captcha):
    with requests.Session() as req:
        print("[*] - Please wait while CAPTCHA is solved ")
        cdata1 = {
            "clientKey": ckey,
            "task": {
                "type": "ImageToTextTask",
                "body": captcha
            }
        }
        cdata2 = {
            "clientKey": ckey
        }
        while True:
            try:
                r = req.post(
                    'https://api.anti-captcha.com/createTask', json=cdata1)
                cdata2['taskId'] = r.json()['taskId']
                break
            except KeyError:
                logger.debug(r.json()["errorDescription"])
                continue

        while True:
            sleep(5)
            logger.info("Slept 5 Seconds!")
            fr = req.post(
                'https://api.anti-captcha.com/getTaskResult', json=cdata2)
            status = fr.json()
            logger.debug("Status: {}".format(status["status"]))
            if status['status'] == "ready":
                cap = status['solution']['text']
                print("[*] - CAPTCHA Solved!")
                return cap
            else:
                continue


def main(pat):

    # saving to current working directory
    options = Options()
    options.set_preference('browser.download.folderList', 2)
    options.set_preference('browser.download.manager.showWhenStarting', False)
    options.set_preference('browser.download.dir', os.getcwd())
    options.set_preference(
        'browser.helperApps.neverAsk.saveToDisk', 'pdf')
    #__________________________#

    driver = webdriver.Firefox(options=options)
    print(f"Checking (CNPJ/CPF)# {pat}")
    while True:
        try:
            driver.get(mainurl)
            waiter = WebDriverWait(driver, 60)
            waiter.until(
                EC.element_to_be_clickable(
                    (By.CSS_SELECTOR, "input[value=Regularização]"))
            ).click()
            waiter.until(
                EC.presence_of_element_located(
                    (By.CSS_SELECTOR, "#consultarRegularizacaoForm\:cpfCnpj"))
            ).send_keys(pat)

            cap = waiter.until(
                EC.presence_of_element_located(
                    (By.CSS_SELECTOR, "img[src^=data]"))).get_attribute('src').split(',', 1)[1]
            break
        except exceptions.TimeoutException:
            logger.error('[*] - Unable to found elements, Refreshing Request.')
            continue
    capso = solver(cap)
    if capso:
        driver.find_element(By.ID, 'idCaptcha').send_keys(capso)
        driver.find_element(
            By.ID, 'consultarRegularizacaoForm:btnEmitirCertidao').click()


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Download PDF File!')
    parser.add_argument(
        'pattern', metavar="(CNPJ/CPF) Number", help="(CNPJ/CPF) Number", type=str)
    try:
        main(parser.parse_args().pattern)
    except KeyboardInterrupt:
        exit("Good Bye!")

Usage: python script.py 15436940000103



from Selenium save file directlry to current working directly

No comments:

Post a Comment