https://chromedriver.chromium.org/downloads
Firefox driver: geckodriver
https://github.com/mozilla/geckodriver/releases
Basic Selenium
from selenium import webdriver
browser=webdriver.Chrome('D:\\chromedriver.exe')
browser.get('http://google.com')
browser.quit()
Selenium with beautifulsoup example 1: will pop chrome
from selenium import webdriver
from bs4 import BeautifulSoup
try:
chrome=webdriver.Chrome(executable_path='D:\\CHROME_DRIVER\\chromedriver.exe')
chrome.set_page_load_timeout(10)
chrome.get('https://code-gym.github.io/spider_demo/')
soup = BeautifulSoup(chrome.page_source, 'html5lib')
print(soup.find('h1').text)
finally:
browser.quit()
Selenium with beautifulsoup example 2: will run chrome at daemon
from selenium import webdriver
from bs4 import BeautifulSoup
try:
options = webdriver.ChromeOptions()
options.add_argument('--headless')
chrome=webdriver.Chrome(options=options,executable_path='D:\\CHROME_DRIVER\\chromedriver.exe')
chrome.set_page_load_timeout(10)
chrome.get('https://code-gym.github.io/spider_demo/')
soup = BeautifulSoup(chrome.page_source, 'html5lib')
print(soup.find('h1').text)
finally:
browser.quit()
Selenium with beautifulsoup using xpath to find related article
from selenium import webdriver
from bs4 import BeautifulSoup
try:
. options = webdriver.ChromeOptions()
..........................
..........................
..........................
print(soup.find('h1').text)
chrome.find_element_by_xpath('/html/body/div[2]/div/div[1]/div[1]/div/div/h3/a').click(
print(chrome.find_element_by_xpath('//*[@id="post-header"]/div[2]/div/div/h1').text)
finally:
browser.quit()
No comments:
Post a Comment