import requests
from bs4 import BeautifulSoup
import time
today = time.strftime('%m/%d').lstrip('0')
def ptt(url):
resp = requests.get(url)
if resp.status_code != 200:
print('URL發生錯誤:' + url)
return
soup = BeautifulSoup(resp.text, 'html5lib')
paging = soup.find('div', 'btn-group btn-group-paging').find_all('a')[1]['href']
articles = []
rents = soup.find_all('div', 'r-ent')
for rent in rents:
title = rent.find('div', 'title').text.strip()
count = rent.find('div', 'nrec').text.strip()
date = rent.find('div', 'meta').find('div', 'date').text.strip()
article = '%s %s:%s' % (date, count, title)
try:
if today == date and int(count) > 10:
articles.append(article)
except:
if today == date and count == '爆':
articles.append(article)
if len(articles) != 0:
for article in articles:
print(article)
ptt('https://www.ptt.cc' + paging)
else:
return
ptt('https://www.ptt.cc/bbs/Stock/index.html')
Sunday, June 7, 2020
beautifulsoup crawl ptt sock
去ptt 股票爬文章
Labels:
automation,
BeautifulSoup,
python
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment