在爬取數(shù)據(jù)的過程中,我們可能需要將數(shù)據(jù)存儲到本地文件或數(shù)據(jù)庫中。這里我們以存儲到本地文件為例:

import csv
from bs4
import BeautifulSoup
url = "http://example.com"
#要爬取的網(wǎng)址
proxies = {
    "http": "http://your_proxy_ip:port",
    "https": "http://your_proxy_ip:port"
}
#你的代理IP和端口號
response = requests.get(url, proxies = proxies)
soup = BeautifulSoup(response.text, 'html.parser')
titles = [tag.text
    for tag in soup.find_all('h2')
]
# 提取所有的h2標(biāo)簽文本
with open('titles.csv', 'w', newline = '') as f: writer = csv.writer(f)
writer.writerow(['Title'])
# 寫入表頭
for title in titles: writer.writerow([title])
# 寫入數(shù)據(jù)行