← 返回首页
📡

HTTP编程:requests库、响应处理与Session管理

📂 python ⏱ 3 min 530 words

HTTP编程:requests库、响应处理与Session管理

HTTP(超文本传输协议)是Web的基础协议。Python的requests库是目前最流行的HTTP客户端库,提供了简洁优雅的API。本文将全面介绍HTTP编程的各个方面。

安装requests

pip install requests

发送基本请求

requests库支持所有HTTP方法,使用非常直观:

import requests

# GET请求
response = requests.get("https://httpbin.org/get")
print(f"状态码: {response.status_code}")
print(f"响应头: {response.headers}")
print(f"响应内容: {response.text[:100]}")

# POST请求
data = {"username": "admin", "password": "123456"}
response = requests.post("https://httpbin.org/post", data=data)
print(response.json())

# 其他HTTP方法
response = requests.put("https://httpbin.org/put", data={"key": "value"})
response = requests.delete("https://httpbin.org/delete")
response = requests.patch("https://httpbin.org/patch", data={"key": "value"})

响应对象详解

Response对象包含服务器返回的所有信息:

import requests

response = requests.get("https://httpbin.org/get")

# 状态码
print(f"状态码: {response.status_code}")
print(f"是否成功: {response.ok}")

# 响应内容
print(f"文本内容: {response.text}")          # 自动解码
print(f"字节内容: {response.content[:50]}")   # 原始字节
print(f"JSON解析: {response.json()}")         # 解析为字典

# 响应头
print(f"Content-Type: {response.headers['Content-Type']}")
print(f"服务器: {response.headers.get('Server')}")

# URL信息
print(f"请求URL: {response.url}")
print(f"重定向历史: {response.history}")

# 编码信息
print(f"编码: {response.encoding}")

请求参数与头信息

import requests

# URL参数
params = {"q": "python", "page": 1, "per_page": 10}
response = requests.get("https://httpbin.org/get", params=params)
print(f"实际URL: {response.url}")

# 自定义请求头
headers = {
    "User-Agent": "MyApp/1.0",
    "Authorization": "Bearer token123",
    "Accept": "application/json",
    "Content-Type": "application/json"
}
response = requests.get("https://httpbin.org/headers", headers=headers)
print(response.json())

# 发送JSON数据
json_data = {
    "name": "张三",
    "age": 25,
    "email": "zhangsan@example.com"
}
response = requests.post("https://httpbin.org/post", json=json_data)
print(response.json())

超时与异常处理

import requests
from requests.exceptions import (
    Timeout,
    ConnectionError,
    HTTPError,
    RequestException
)

def safe_request(url, method="GET", **kwargs):
    """安全的HTTP请求封装"""
    try:
        # 设置超时(连接超时5秒,读取超时10秒)
        kwargs.setdefault("timeout", (5, 10))
        
        response = requests.request(method, url, **kwargs)
        response.raise_for_status()  # 自动抛出HTTP错误
        
        return response
        
    except Timeout:
        print(f"请求超时: {url}")
    except ConnectionError:
        print(f"连接错误: {url}")
    except HTTPError as e:
        print(f"HTTP错误 {e.response.status_code}: {url}")
    except RequestException as e:
        print(f"请求异常: {e}")
    
    return None

# 使用示例
response = safe_request("https://httpbin.org/get")
if response:
    print(response.json())

文件上传与下载

import requests

# 上传文件
files = {
    "file": ("report.pdf", open("report.pdf", "rb"), "application/pdf")
}
response = requests.post("https://httpbin.org/post", files=files)

# 上传多个文件
files = [
    ("files", ("file1.txt", b"file1 content")),
    ("files", ("file2.txt", b"file2 content")),
]
response = requests.post("https://httpbin.org/post", files=files)

# 下载文件(流式下载)
def download_file(url, filename):
    response = requests.get(url, stream=True)
    response.raise_for_status()
    
    with open(filename, "wb") as f:
        for chunk in response.iter_content(chunk_size=8192):
            f.write(chunk)
    
    print(f"文件已下载: {filename}")

# 流式下载大文件
response = requests.get("https://httpbin.org/image/png", stream=True)
with open("image.png", "wb") as f:
    for chunk in response.iter_content(chunk_size=1024):
        f.write(chunk)

Session会话管理

Session对象可以在多个请求之间保持某些参数(如Cookie、认证信息):

import requests

# 创建Session
session = requests.Session()

# 设置Session级别的默认参数
session.headers.update({
    "User-Agent": "MyApp/1.0",
    "Accept": "application/json"
})

# 使用Session发送请求(自动管理Cookie)
response1 = session.get("https://httpbin.org/cookies/set/token/abc123")
print(f"Cookie: {session.cookies.get_dict()}")

response2 = session.get("https://httpbin.org/cookies")
print(f"带Cookie的请求: {response2.json()}")

# Session自动处理重定向
response = session.get("https://httpbin.org/redirect/3")
print(f"最终URL: {response.url}")
print(f"重定向次数: {len(response.history)}")

session.close()

认证方式

import requests
from requests.auth import HTTPBasicAuth, HTTPDigestAuth, AuthBase

# 基本认证
response = requests.get(
    "https://httpbin.org/basic-auth/user/pass",
    auth=HTTPBasicAuth("user", "pass")
)

# 直接使用元组认证
response = requests.get(
    "https://httpbin.org/basic-auth/user/pass",
    auth=("user", "pass")
)

# 摘要认证
response = requests.get(
    "https://httpbin.org/digest-auth/auth/user/pass",
    auth=HTTPDigestAuth("user", "pass")
)

# 自定义认证类
class TokenAuth(AuthBase):
    def __init__(self, token):
        self.token = token
    
    def __call__(self, r):
        r.headers["Authorization"] = f"Bearer {self.token}"
        return r

# 使用自定义认证
response = requests.get(
    "https://httpbin.org/headers",
    auth=TokenAuth("my_secret_token")
)
print(response.json())

代理与SSL配置

import requests

# 使用代理
proxies = {
    "http": "http://proxy.example.com:8080",
    "https": "https://proxy.example.com:8443"
}
response = requests.get("https://httpbin.org/ip", proxies=proxies)

# SOCKS代理
proxies = {
    "http": "socks5://127.0.0.1:1080",
    "https": "socks5://127.0.0.1:1080"
}

# SSL证书验证
response = requests.get("https://httpbin.org/get", verify=True)  # 默认
response = requests.get("https://httpbin.org/get", verify=False)  # 禁用验证
response = requests.get(
    "https://httpbin.org/get",
    cert=("cert.pem", "key.pem")  # 指定客户端证书
)

实战示例:API客户端封装

import requests
from typing import Optional, Dict, Any

class APIClient:
    def __init__(self, base_url: str, token: Optional[str] = None):
        self.base_url = base_url.rstrip("/")
        self.session = requests.Session()
        
        if token:
            self.session.headers["Authorization"] = f"Bearer {token}"
        
        self.session.headers["Content-Type"] = "application/json"
    
    def _request(self, method: str, endpoint: str, **kwargs) -> Dict[str, Any]:
        url = f"{self.base_url}/{endpoint.lstrip('/')}"
        response = self.session.request(method, url, **kwargs)
        response.raise_for_status()
        return response.json()
    
    def get(self, endpoint: str, params: Optional[Dict] = None) -> Dict:
        return self._request("GET", endpoint, params=params)
    
    def post(self, endpoint: str, data: Optional[Dict] = None) -> Dict:
        return self._request("POST", endpoint, json=data)
    
    def put(self, endpoint: str, data: Optional[Dict] = None) -> Dict:
        return self._request("PUT", endpoint, json=data)
    
    def delete(self, endpoint: str) -> Dict:
        return self._request("DELETE", endpoint)
    
    def close(self):
        self.session.close()

# 使用示例
client = APIClient("https://api.example.com", token="your_token")
users = client.get("/users", params={"page": 1})
new_user = client.post("/users", data={"name": "李四", "email": "lisi@example.com"})
client.close()

总结

requests库是Python HTTP编程的首选工具。掌握请求发送、响应处理、Session管理和异常处理,你就能轻松调用各种Web API,构建强大的网络应用。记住始终设置超时、处理异常,并考虑使用Session来保持会话状态。