python爬虫爬取豆瓣Top250的书籍信息,并保存到文件
import requests
from bs4 import BeautifulSoup
resp = requests.get('https://book.douban.com/top250?start=0')
soup = BeautifulSoup(resp.text, 'lxml')
def get_html(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'}
resp = requests.get(url, headers=headers).text
return resp
def all_page():
base_url = 'https://book.douban.com/top250?start='
urllist = []
for page in range(0, 250, 25):
allurl = base_url + str(page)
urllist.append(allurl)
return urllist
def html_parse():
for url in all_page():
soup = BeautifulSoup(get_html(url), 'lxml')
alldiv = soup.find_all('div', class_='pl2')
name
python
运行