HTTP编程:requests库、响应处理与Session管理
HTTP编程:requests库、响应处理与Session管理
HTTP(超文本传输协议)是Web的基础协议。Python的requests库是目前最流行的HTTP客户端库,提供了简洁优雅的API。本文将全面介绍HTTP编程的各个方面。
安装requests
pip install requests
发送基本请求
requests库支持所有HTTP方法,使用非常直观:
import requests
# GET请求
response = requests.get("https://httpbin.org/get")
print(f"状态码: {response.status_code}")
print(f"响应头: {response.headers}")
print(f"响应内容: {response.text[:100]}")
# POST请求
data = {"username": "admin", "password": "123456"}
response = requests.post("https://httpbin.org/post", data=data)
print(response.json())
# 其他HTTP方法
response = requests.put("https://httpbin.org/put", data={"key": "value"})
response = requests.delete("https://httpbin.org/delete")
response = requests.patch("https://httpbin.org/patch", data={"key": "value"})
响应对象详解
Response对象包含服务器返回的所有信息:
import requests
response = requests.get("https://httpbin.org/get")
# 状态码
print(f"状态码: {response.status_code}")
print(f"是否成功: {response.ok}")
# 响应内容
print(f"文本内容: {response.text}") # 自动解码
print(f"字节内容: {response.content[:50]}") # 原始字节
print(f"JSON解析: {response.json()}") # 解析为字典
# 响应头
print(f"Content-Type: {response.headers['Content-Type']}")
print(f"服务器: {response.headers.get('Server')}")
# URL信息
print(f"请求URL: {response.url}")
print(f"重定向历史: {response.history}")
# 编码信息
print(f"编码: {response.encoding}")
请求参数与头信息
import requests
# URL参数
params = {"q": "python", "page": 1, "per_page": 10}
response = requests.get("https://httpbin.org/get", params=params)
print(f"实际URL: {response.url}")
# 自定义请求头
headers = {
"User-Agent": "MyApp/1.0",
"Authorization": "Bearer token123",
"Accept": "application/json",
"Content-Type": "application/json"
}
response = requests.get("https://httpbin.org/headers", headers=headers)
print(response.json())
# 发送JSON数据
json_data = {
"name": "张三",
"age": 25,
"email": "zhangsan@example.com"
}
response = requests.post("https://httpbin.org/post", json=json_data)
print(response.json())
超时与异常处理
import requests
from requests.exceptions import (
Timeout,
ConnectionError,
HTTPError,
RequestException
)
def safe_request(url, method="GET", **kwargs):
"""安全的HTTP请求封装"""
try:
# 设置超时(连接超时5秒,读取超时10秒)
kwargs.setdefault("timeout", (5, 10))
response = requests.request(method, url, **kwargs)
response.raise_for_status() # 自动抛出HTTP错误
return response
except Timeout:
print(f"请求超时: {url}")
except ConnectionError:
print(f"连接错误: {url}")
except HTTPError as e:
print(f"HTTP错误 {e.response.status_code}: {url}")
except RequestException as e:
print(f"请求异常: {e}")
return None
# 使用示例
response = safe_request("https://httpbin.org/get")
if response:
print(response.json())
文件上传与下载
import requests
# 上传文件
files = {
"file": ("report.pdf", open("report.pdf", "rb"), "application/pdf")
}
response = requests.post("https://httpbin.org/post", files=files)
# 上传多个文件
files = [
("files", ("file1.txt", b"file1 content")),
("files", ("file2.txt", b"file2 content")),
]
response = requests.post("https://httpbin.org/post", files=files)
# 下载文件(流式下载)
def download_file(url, filename):
response = requests.get(url, stream=True)
response.raise_for_status()
with open(filename, "wb") as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
print(f"文件已下载: {filename}")
# 流式下载大文件
response = requests.get("https://httpbin.org/image/png", stream=True)
with open("image.png", "wb") as f:
for chunk in response.iter_content(chunk_size=1024):
f.write(chunk)
Session会话管理
Session对象可以在多个请求之间保持某些参数(如Cookie、认证信息):
import requests
# 创建Session
session = requests.Session()
# 设置Session级别的默认参数
session.headers.update({
"User-Agent": "MyApp/1.0",
"Accept": "application/json"
})
# 使用Session发送请求(自动管理Cookie)
response1 = session.get("https://httpbin.org/cookies/set/token/abc123")
print(f"Cookie: {session.cookies.get_dict()}")
response2 = session.get("https://httpbin.org/cookies")
print(f"带Cookie的请求: {response2.json()}")
# Session自动处理重定向
response = session.get("https://httpbin.org/redirect/3")
print(f"最终URL: {response.url}")
print(f"重定向次数: {len(response.history)}")
session.close()
认证方式
import requests
from requests.auth import HTTPBasicAuth, HTTPDigestAuth, AuthBase
# 基本认证
response = requests.get(
"https://httpbin.org/basic-auth/user/pass",
auth=HTTPBasicAuth("user", "pass")
)
# 直接使用元组认证
response = requests.get(
"https://httpbin.org/basic-auth/user/pass",
auth=("user", "pass")
)
# 摘要认证
response = requests.get(
"https://httpbin.org/digest-auth/auth/user/pass",
auth=HTTPDigestAuth("user", "pass")
)
# 自定义认证类
class TokenAuth(AuthBase):
def __init__(self, token):
self.token = token
def __call__(self, r):
r.headers["Authorization"] = f"Bearer {self.token}"
return r
# 使用自定义认证
response = requests.get(
"https://httpbin.org/headers",
auth=TokenAuth("my_secret_token")
)
print(response.json())
代理与SSL配置
import requests
# 使用代理
proxies = {
"http": "http://proxy.example.com:8080",
"https": "https://proxy.example.com:8443"
}
response = requests.get("https://httpbin.org/ip", proxies=proxies)
# SOCKS代理
proxies = {
"http": "socks5://127.0.0.1:1080",
"https": "socks5://127.0.0.1:1080"
}
# SSL证书验证
response = requests.get("https://httpbin.org/get", verify=True) # 默认
response = requests.get("https://httpbin.org/get", verify=False) # 禁用验证
response = requests.get(
"https://httpbin.org/get",
cert=("cert.pem", "key.pem") # 指定客户端证书
)
实战示例:API客户端封装
import requests
from typing import Optional, Dict, Any
class APIClient:
def __init__(self, base_url: str, token: Optional[str] = None):
self.base_url = base_url.rstrip("/")
self.session = requests.Session()
if token:
self.session.headers["Authorization"] = f"Bearer {token}"
self.session.headers["Content-Type"] = "application/json"
def _request(self, method: str, endpoint: str, **kwargs) -> Dict[str, Any]:
url = f"{self.base_url}/{endpoint.lstrip('/')}"
response = self.session.request(method, url, **kwargs)
response.raise_for_status()
return response.json()
def get(self, endpoint: str, params: Optional[Dict] = None) -> Dict:
return self._request("GET", endpoint, params=params)
def post(self, endpoint: str, data: Optional[Dict] = None) -> Dict:
return self._request("POST", endpoint, json=data)
def put(self, endpoint: str, data: Optional[Dict] = None) -> Dict:
return self._request("PUT", endpoint, json=data)
def delete(self, endpoint: str) -> Dict:
return self._request("DELETE", endpoint)
def close(self):
self.session.close()
# 使用示例
client = APIClient("https://api.example.com", token="your_token")
users = client.get("/users", params={"page": 1})
new_user = client.post("/users", data={"name": "李四", "email": "lisi@example.com"})
client.close()
总结
requests库是Python HTTP编程的首选工具。掌握请求发送、响应处理、Session管理和异常处理,你就能轻松调用各种Web API,构建强大的网络应用。记住始终设置超时、处理异常,并考虑使用Session来保持会话状态。