数据类
什么是数据类
Python 3.7引入了dataclass装饰器,用于自动创建__init__、__repr__、__eq__等特殊方法,简化类的定义。
from dataclasses import dataclass
@dataclass
class Person:
name: str
age: int
city: str
# 自动生成了以下方法:
# __init__(self, name, age, city)
# __repr__(self)
# __eq__(self, other)
p1 = Person("Alice", 25, "北京")
p2 = Person("Bob", 30, "上海")
print(p1) # 输出: Person(name='Alice', age=25, city='北京')
print(p1 == p2) # 输出: False
字段配置
使用field()函数可以配置字段的默认值、默认工厂函数等。
from dataclasses import dataclass, field
from typing import List
@dataclass
class Student:
name: str
age: int
grades: List[float] = field(default_factory=list)
email: str = field(default=None)
is_active: bool = field(default=True)
# 使用
s1 = Student("Alice", 20)
s2 = Student("Bob", 22, [90.0, 85.0], "bob@example.com")
print(s1) # 输出: Student(name='Alice', age=20, grades=[], email=None, is_active=True)
print(s2) # 输出: Student(name='Bob', age=22, grades=[90.0, 85.0], email='bob@example.com', is_active=True)
不可变数据类
使用frozen=True创建不可变的数据类。
@dataclass(frozen=True)
class Point:
x: int
y: int
p1 = Point(1, 2)
p2 = Point(3, 4)
# 不可修改
# p1.x = 10 # 会抛出FrozenInstanceError
# 可以用于字典的键
points = {p1: "point1", p2: "point2"}
print(points[Point(1, 2)]) # 输出: point1
排序和比较
from dataclasses import dataclass, field
from typing import List
@dataclass(order=True)
class Student:
name: str
age: int
grades: List[float] = field(default_factory=list, compare=False)
# 按年龄排序
students = [
Student("Alice", 25),
Student("Bob", 20),
Student("Charlie", 22)
]
sorted_students = sorted(students)
for s in sorted_students:
print(f"{s.name}: {s.age}")
# 输出:
# Bob: 20
# Charlie: 22
# Alice: 25
继承
数据类支持继承,但有一些限制。
from dataclasses import dataclass, field
@dataclass
class Employee:
name: str
salary: float
department: str = "General"
@dataclass
class Manager(Employee):
team_size: int = 0
reports: List[str] = field(default_factory=list)
# 使用
m = Manager("Alice", 50000, "Engineering", 5, ["Bob", "Charlie"])
print(m) # 输出: Manager(name='Alice', salary=50000, department='Engineering', team_size=5, reports=['Bob', 'Charlie'])
后处理(post_init)
使用__post_init__方法进行初始化后的验证或转换。
from dataclasses import dataclass
import math
@dataclass
class Circle:
radius: float
def __post_init__(self):
if self.radius < 0:
raise ValueError("半径不能为负数")
@property
def area(self) -> float:
return math.pi * self.radius ** 2
@property
def circumference(self) -> float:
return 2 * math.pi * self.radius
# 使用
c = Circle(5)
print(f"面积: {c.area:.2f}") # 输出: 面积: 78.54
print(f"周长: {c.circumference:.2f}") # 输出: 周长: 31.42
dataclass vs namedtuple
from collections import namedtuple
from dataclasses import dataclass
# namedtuple方式
PointTuple = namedtuple('PointTuple', ['x', 'y'])
p1 = PointTuple(1, 2)
# dataclass方式
@dataclass
class PointDataclass:
x: int
y: int
p2 = PointDataclass(1, 2)
# 主要区别:
# 1. namedtuple不可变,dataclass可变(除非frozen=True)
# 2. dataclass支持默认值和类型注解
# 3. dataclass支持继承
# 4. dataclass支持__post_init__钩子
实际应用案例
from dataclasses import dataclass, field
from typing import List, Dict
from datetime import datetime
@dataclass
class Task:
title: str
description: str
priority: int = 1
created_at: datetime = field(default_factory=datetime.now)
completed: bool = False
tags: List[str] = field(default_factory=list)
def complete(self):
self.completed = True
def add_tag(self, tag: str):
if tag not in self.tags:
self.tags.append(tag)
@dataclass
class TaskManager:
tasks: List[Task] = field(default_factory=list)
def add_task(self, title: str, description: str, priority: int = 1) -> Task:
task = Task(title, description, priority)
self.tasks.append(task)
return task
def get_pending_tasks(self) -> List[Task]:
return [t for t in self.tasks if not t.completed]
def get_tasks_by_priority(self, priority: int) -> List[Task]:
return [t for t in self.tasks if t.priority == priority]
# 使用
manager = TaskManager()
manager.add_task("学习Python", "完成dataclass章节", 1)
manager.add_task("写代码", "实现数据类", 2)
pending = manager.get_pending_tasks()
for task in pending:
print(f"{task.title} (优先级: {task.priority})")
性能优化
from dataclasses import dataclass, field
# 使用slots=True提高内存效率(Python 3.10+)
@dataclass(slots=True)
class EfficientPoint:
x: float
y: float
# 这会创建__slots__,减少内存使用
最佳实践
- 对于简单的数据容器,优先使用dataclass
- 需要不可变性时使用
frozen=True - 复杂的验证逻辑放在
__post_init__中 - 合理使用
field()配置字段属性 - 考虑使用
slots=True优化性能
dataclass大大简化了Python中的数据类定义,是现代Python开发的重要工具。