python实现网络请求与爬虫
- categories
- >
- python实现网络请求与爬虫
获取网页内容
import requests
response = requests.get('https://www.example.com')
print(response.text)
解析 HTML 页面
from bs4 import BeautifulSoup
soup = BeautifulSoup(response.text, 'html.parser')
titles = soup.find_all('h1')
for title in titles:
print(title.text)
下载图片
import requests
img_data = requests.get('http://example.com/image.jpg').content
with open('image.jpg', 'wb') as handler:
handler.write(img_data)
发送 HTTP POST 请求
import requests
payload = {'key1': 'value1', 'key2': 'value2'}
response = requests.post('https://httpbin.org/post', data=payload)
print(response.text)
处理 JSON 响应
import requests
response = requests.get('https://api.example.com/data')
data = response.json()
print(data)
设置超时时间
import requests
try:
response = requests.get('https://www.example.com', timeout=5)
except requests.Timeout:
print("The request timed out")
处理异常
import requests
try:
response = requests.get('https://www.example.com')
response.raise_for_status()
except requests.HTTPError as http_err:
print(f"HTTP error occurred: {http_err}")
except Exception as err:
print(f"Other error occurred: {err}")
使用会话保持连接
import requests
session = requests.Session()
response = session.get('https://www.example.com')
print(response.text)
获取响应头信息
import requests
response = requests.get('https://www.example.com')
print(response.headers)
设置自定义请求头
import requests
headers = {'User-Agent': 'MyApp/1.0'}
response = requests.get('https://www.example.com', headers=headers)
print(response.text)
comment:
- Valine
- LiveRe
- ChangYan