import requests
from bs4 import BeautifulSoup
import time

# 简书文章的URL
article_url = ‘https://www.jianshu.com/p/796961eeb1bc’ # 替换为实际的文章URL

# 设置请求头,模仿浏览器行为
headers = {
‘User-Agent’: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3’
}

# 发送HTTP请求
response = requests.get(article_url, headers=headers)
response.encoding = ‘utf-8’

# 设置延迟
time.sleep(1)

# 检查请求是否成功
if response.status_code == 200:
# 使用BeautifulSoup解析HTML内容
soup = BeautifulSoup(response.text, ‘html.parser’)

# 打印整个HTML文档,以便查找正确的选择器
print(soup.prettify())

# 更新选择器以匹配正确的元素
article_content = soup.find(‘div’, class_=’_gp-ck’) # 示例选择器,可能需要根据实际HTML结构更新

if article_content:
# 打印文章内容
print(article_content.text)
else:
print(‘无法找到文章内容元素’)
else:
print(‘请求失败,状态码:’, response.status_code)

# 简书文章的URL
# article_url = ‘https://www.jianshu.com/p/796961eeb1bc’ # 替换为实际的文章URL

[wpstatistics stat=usersonline time=total]

发表回复

您的邮箱地址不会被公开。 必填项已用 * 标注