← 返回首页
🐍

数据类

📂 python ⏱ 3 min 478 words

什么是数据类

Python 3.7引入了dataclass装饰器,用于自动创建__init____repr____eq__等特殊方法,简化类的定义。

from dataclasses import dataclass

@dataclass
class Person:
    name: str
    age: int
    city: str

# 自动生成了以下方法:
# __init__(self, name, age, city)
# __repr__(self)
# __eq__(self, other)

p1 = Person("Alice", 25, "北京")
p2 = Person("Bob", 30, "上海")
print(p1)  # 输出: Person(name='Alice', age=25, city='北京')
print(p1 == p2)  # 输出: False

字段配置

使用field()函数可以配置字段的默认值、默认工厂函数等。

from dataclasses import dataclass, field
from typing import List

@dataclass
class Student:
    name: str
    age: int
    grades: List[float] = field(default_factory=list)
    email: str = field(default=None)
    is_active: bool = field(default=True)

# 使用
s1 = Student("Alice", 20)
s2 = Student("Bob", 22, [90.0, 85.0], "bob@example.com")

print(s1)  # 输出: Student(name='Alice', age=20, grades=[], email=None, is_active=True)
print(s2)  # 输出: Student(name='Bob', age=22, grades=[90.0, 85.0], email='bob@example.com', is_active=True)

不可变数据类

使用frozen=True创建不可变的数据类。

@dataclass(frozen=True)
class Point:
    x: int
    y: int

p1 = Point(1, 2)
p2 = Point(3, 4)

# 不可修改
# p1.x = 10  # 会抛出FrozenInstanceError

# 可以用于字典的键
points = {p1: "point1", p2: "point2"}
print(points[Point(1, 2)])  # 输出: point1

排序和比较

from dataclasses import dataclass, field
from typing import List

@dataclass(order=True)
class Student:
    name: str
    age: int
    grades: List[float] = field(default_factory=list, compare=False)

# 按年龄排序
students = [
    Student("Alice", 25),
    Student("Bob", 20),
    Student("Charlie", 22)
]

sorted_students = sorted(students)
for s in sorted_students:
    print(f"{s.name}: {s.age}")
# 输出:
# Bob: 20
# Charlie: 22
# Alice: 25

继承

数据类支持继承,但有一些限制。

from dataclasses import dataclass, field

@dataclass
class Employee:
    name: str
    salary: float
    department: str = "General"

@dataclass
class Manager(Employee):
    team_size: int = 0
    reports: List[str] = field(default_factory=list)

# 使用
m = Manager("Alice", 50000, "Engineering", 5, ["Bob", "Charlie"])
print(m)  # 输出: Manager(name='Alice', salary=50000, department='Engineering', team_size=5, reports=['Bob', 'Charlie'])

后处理(post_init

使用__post_init__方法进行初始化后的验证或转换。

from dataclasses import dataclass
import math

@dataclass
class Circle:
    radius: float
    
    def __post_init__(self):
        if self.radius < 0:
            raise ValueError("半径不能为负数")
    
    @property
    def area(self) -> float:
        return math.pi * self.radius ** 2
    
    @property
    def circumference(self) -> float:
        return 2 * math.pi * self.radius

# 使用
c = Circle(5)
print(f"面积: {c.area:.2f}")  # 输出: 面积: 78.54
print(f"周长: {c.circumference:.2f}")  # 输出: 周长: 31.42

dataclass vs namedtuple

from collections import namedtuple
from dataclasses import dataclass

# namedtuple方式
PointTuple = namedtuple('PointTuple', ['x', 'y'])
p1 = PointTuple(1, 2)

# dataclass方式
@dataclass
class PointDataclass:
    x: int
    y: int

p2 = PointDataclass(1, 2)

# 主要区别:
# 1. namedtuple不可变,dataclass可变(除非frozen=True)
# 2. dataclass支持默认值和类型注解
# 3. dataclass支持继承
# 4. dataclass支持__post_init__钩子

实际应用案例

from dataclasses import dataclass, field
from typing import List, Dict
from datetime import datetime

@dataclass
class Task:
    title: str
    description: str
    priority: int = 1
    created_at: datetime = field(default_factory=datetime.now)
    completed: bool = False
    tags: List[str] = field(default_factory=list)
    
    def complete(self):
        self.completed = True
    
    def add_tag(self, tag: str):
        if tag not in self.tags:
            self.tags.append(tag)

@dataclass
class TaskManager:
    tasks: List[Task] = field(default_factory=list)
    
    def add_task(self, title: str, description: str, priority: int = 1) -> Task:
        task = Task(title, description, priority)
        self.tasks.append(task)
        return task
    
    def get_pending_tasks(self) -> List[Task]:
        return [t for t in self.tasks if not t.completed]
    
    def get_tasks_by_priority(self, priority: int) -> List[Task]:
        return [t for t in self.tasks if t.priority == priority]

# 使用
manager = TaskManager()
manager.add_task("学习Python", "完成dataclass章节", 1)
manager.add_task("写代码", "实现数据类", 2)

pending = manager.get_pending_tasks()
for task in pending:
    print(f"{task.title} (优先级: {task.priority})")

性能优化

from dataclasses import dataclass, field

# 使用slots=True提高内存效率(Python 3.10+)
@dataclass(slots=True)
class EfficientPoint:
    x: float
    y: float

# 这会创建__slots__,减少内存使用

最佳实践

dataclass大大简化了Python中的数据类定义,是现代Python开发的重要工具。