docker로 크롤링 실행
docker file
FROM python:3.11.4
#FROM python:3.8.8
WORKDIR /root
RUN apt-get update
# install google chrome
RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add -
RUN sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google-chrome.list'
RUN apt-get -y update
RUN apt-get install -y google-chrome-stable
# install chromedriver
RUN apt-get install wget
RUN apt-get install -yqq unzip
#RUN wget -O /tmp/chromedriver.zip http://chromedriver.storage.googleapis.com/`curl -sS chromedriver.storage.googleapis.com/LATEST_RELEASE`/chromedriver_linux64.zip
RUN wget -O /tmp/chromedriver.zip https://edgedl.me.gvt1.com/edgedl/chrome/chrome-for-testing/116.0.5845.96/linux64/chromedriver-linux64.zip
#RUN unzip /tmp/chromedriver.zip chromedriver -d /usr/local/bin/
RUN unzip /tmp/chromedriver.zip chromedriver-linux64/chromedriver -d /usr/local/bin/
# set display port to avoid crash
ENV DISPLAY=:99
# install selenium
RUN pip install selenium==4.12.0
RUN pip install webdriver_manager
RUN apt-get update
ADD test.py .
CMD ["python3", "test.py"]
test.py
from selenium import webdriver
from selenium.webdriver.common.by import By
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--window-size=1920,1080')
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
driver = webdriver.Chrome(options=chrome_options)
print("process1")
URL ='https://www.google.co.kr/imghp'
driver.get(url=URL)
print("process3")
driver.implicitly_wait(time_to_wait=10)
print("process4")
screenshot = driver.save_screenshot('test.png')
print("process4")
elem = driver.find_element(By.CSS_SELECTOR,"#APjFqb")
driver.quit()
print("Success")
'''
from webdriver_manager.chrome import ChromeDriverManager
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
print("process1")
#driver = webdriver.Chrome()
'''
'''
service = Service(executable_path='/usr/lib/chromium-browser/chromedriver')
options = webdriver.ChromeOptions()
driver = webdriver.Chrome(service=service, options=options)
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
driver = webdriver.Chrome(executable_path="/home/streamsets/crawlingExe/chromedriver",chrome_options=chrome_options)
option = webdriver.ChromeOptions()
driver = webdriver.Chrome(options = option)
'''
'''
print("process1")
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--window-size=1920,1080')
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
driver = webdriver.Chrome(options=chrome_options)
print("process2")
URL ='https://www.google.co.kr/imghp'
driver.get(url=URL)
print("process3")
driver.implicitly_wait(time_to_wait=10)
print("process4")
elem = driver.find_element(By.CSS_SELECTOR,"#APjFqb")
print("Success")
driver.quit()
'''