I've looked at and tried nearly every other post on this topic with no luck.
EC2
I'm using python 3.6
so I'm using the following AMI amzn-ami-hvm-2018.03.0.20181129-x86_64-gp2
(see here). Once I SSH into my EC2, I download Chrome with:
sudo curl https://intoli.com/install-google-chrome.sh | bash
cp -r /opt/google/chrome/ /home/ec2-user/
google-chrome-stable --version
# Google Chrome 86.0.4240.198
And download and unzip the matching Chromedriver:
sudo wget https://chromedriver.storage.googleapis.com/86.0.4240.22/chromedriver_linux64.zip
sudo unzip chromedriver_linux64.zip
I install python36
and selenium
with:
sudo yum install python36 -y
sudo /usr/bin/pip-3.6 install selenium
Then run the script:
import os
import selenium
from selenium import webdriver
CURR_PATH = os.getcwd()
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--headless')
chrome_options.add_argument('--window-size=1280x1696')
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument('--hide-scrollbars')
chrome_options.add_argument('--enable-logging')
chrome_options.add_argument('--log-level=0')
chrome_options.add_argument('--v=99')
chrome_options.add_argument('--single-process')
chrome_options.add_argument('--ignore-certificate-errors')
chrome_options.add_argument('--remote-debugging-port=9222')
chrome_options.binary_location = f"{CURR_PATH}/chrome/google-chrome"
driver = webdriver.Chrome(
executable_path = f"{CURR_PATH}/chromedriver",
chrome_options=chrome_options
)
driver.get("https://www.google.com/")
html = driver.page_source
print(html)
This works
Lambda
I then zip my chromedriver and Chrome files:
mkdir tmp
mv chromedriver tmp
mv chrome tmp
cd tmp
zip -r9 ../chrome.zip chromedriver chrome
And copy the zipped file to an S3
bucket
This is my lambda function:
import os
import boto3
from botocore.exceptions import ClientError
import zipfile
import selenium
from selenium import webdriver
s3 = boto3.resource('s3')
def handler(event, context):
chrome_bucket = os.environ.get('CHROME_S3_BUCKET')
chrome_key = os.environ.get('CHROME_S3_KEY')
# DOWNLOAD HEADLESS CHROME FROM S3
try:
# with open('/tmp/headless_chrome.zip', 'wb') as data:
s3.meta.client.download_file(chrome_bucket, chrome_key, '/tmp/chrome.zip')
print(os.listdir('/tmp'))
except ClientError as e:
raise e
# UNZIP HEADLESS CHROME
try:
with zipfile.ZipFile('/tmp/chrome.zip', 'r') as zip_ref:
zip_ref.extractall('/tmp')
# FREE UP SPACE
os.remove('/tmp/chrome.zip')
print(os.listdir('/tmp'))
except:
raise ValueError('Problem with unzipping Chrome executable')
# CHANGE PERMISSION OF CHROME
try:
os.chmod('/tmp/chromedriver', 0o775)
os.chmod('/tmp/chrome/chrome', 0o775)
os.chmod('/tmp/chrome/google-chrome', 0o775)
except:
raise ValueError('Problem with changing permissions to Chrome executable')
# GET LINKS
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--headless')
chrome_options.add_argument('--window-size=1280x1696')
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument('--hide-scrollbars')
chrome_options.add_argument('--enable-logging')
chrome_options.add_argument('--log-level=0')
chrome_options.add_argument('--v=99')
chrome_options.add_argument('--single-process')
chrome_options.add_argument('--ignore-certificate-errors')
chrome_options.add_argument('--remote-debugging-port=9222')
chrome_options.binary_location = "/tmp/chrome/google-chrome"
driver = webdriver.Chrome(
executable_path = "/tmp/chromedriver",
chrome_options=chrome_options
)
driver.get("https://www.google.com/")
html = driver.page_source
print(html)
I'm able to see my unzipped files in the /tmp
path.
And my error:
{
"errorMessage": "Message: unknown error: unable to discover open pages\n",
"errorType": "WebDriverException",
"stackTrace": [
[
"/var/task/lib/observer.py",
69,
"handler",
"chrome_options=chrome_options"
],
[
"/var/task/selenium/webdriver/chrome/webdriver.py",
81,
"__init__",
"desired_capabilities=desired_capabilities)"
],
[
"/var/task/selenium/webdriver/remote/webdriver.py",
157,
"__init__",
"self.start_session(capabilities, browser_profile)"
],
[
"/var/task/selenium/webdriver/remote/webdriver.py",
252,
"start_session",
"response = self.execute(Command.NEW_SESSION, parameters)"
],
[
"/var/task/selenium/webdriver/remote/webdriver.py",
321,
"execute",
"self.error_handler.check_response(response)"
],
[
"/var/task/selenium/webdriver/remote/errorhandler.py",
242,
"check_response",
"raise exception_class(message, screen, stacktrace)"
]
]
}
EDIT: I am willing to try out anything at this point. Different versions of Chrome or Chromium, Chromedriver, Python or Selenium.
from Selenium works on AWS EC2 but not on AWS Lambda
No comments:
Post a Comment