迭代器协议深入
迭代器协议深入
迭代器是Python中实现序列访问的核心机制。理解迭代器协议能让你创建自定义的数据结构,并充分利用 itertools 模块的强大功能。
迭代器协议
Python的迭代器协议包含两个方法:
__iter__():返回迭代器对象本身__next__():返回下一个元素,没有更多元素时抛出StopIteration
class CountDown:
"""倒计时迭代器"""
def __init__(self, start):
self.current = start
def __iter__(self):
return self
def __next__(self):
if self.current <= 0:
raise StopIteration
value = self.current
self.current -= 1
return value
# 使用自定义迭代器
for num in CountDown(5):
print(num, end=" ") # 5 4 3 2 1
可迭代对象 vs 迭代器
# 可迭代对象 - 实现了 __iter__ 方法
class MyRange:
def __init__(self, start, end):
self.start = start
self.end = end
def __iter__(self):
return MyRangeIterator(self.start, self.end)
# 迭代器 - 实现了 __iter__ 和 __next__
class MyRangeIterator:
def __init__(self, start, end):
self.current = start
self.end = end
def __iter__(self):
return self
def __next__(self):
if self.current >= self.end:
raise StopIteration
value = self.current
self.current += 1
return value
无限迭代器
itertools 提供了多种无限迭代器:
import itertools
# count - 无限计数
counter = itertools.count(start=10, step=2)
print([next(counter) for _ in range(5)]) # [10, 12, 14, 16, 18]
# cycle - 无限循环
cycler = itertools.cycle(['A', 'B', 'C'])
print([next(cycler) for _ in range(7)]) # ['A', 'B', 'C', 'A', 'B', 'C', 'A']
# repeat - 重复值
repeater = itertools.repeat('hello', 3)
print(list(repeater)) # ['hello', 'hello', 'hello']
itertools 常用函数
import itertools
# chain - 连接多个迭代器
list1 = [1, 2, 3]
list2 = [4, 5, 6]
combined = list(itertools.chain(list1, list2))
print(combined) # [1, 2, 3, 4, 5, 6]
# islice - 切片迭代器
nums = itertools.count(1)
sliced = list(itertools.islice(nums, 5, 15, 2))
print(sliced) # [6, 8, 10, 12, 14]
# groupby - 分组
data = [('A', 1), ('A', 2), ('B', 3), ('B', 4), ('A', 5)]
data.sort(key=lambda x: x[0]) # 必须先排序
for key, group in itertools.groupby(data, key=lambda x: x[0]):
print(f"{key}: {list(group)}")
# permutations - 排列
perms = list(itertools.permutations([1, 2, 3], 2))
print(perms) # [(1,2), (1,3), (2,1), (2,3), (3,1), (3,2)]
# combinations - 组合
combos = list(itertools.combinations([1, 2, 3, 4], 2))
print(combos) # [(1,2), (1,3), (1,4), (2,3), (2,4), (3,4)]
自定义无限迭代器
class Fibonacci:
"""无限斐波那契迭代器"""
def __init__(self):
self.a = 0
self.b = 1
def __iter__(self):
return self
def __next__(self):
result = self.a
self.a, self.b = self.b, self.a + self.b
return result
# 使用 islice 获取有限个
fib = Fibonacci()
first_10 = list(itertools.islice(fib, 10))
print(first_10) # [0, 1, 1, 2, 3, 5, 8, 13, 21, 34]
实战:分块处理
def chunked(iterable, size):
"""将可迭代对象分块"""
iterator = iter(iterable)
while True:
chunk = list(itertools.islice(iterator, size))
if not chunk:
break
yield chunk
# 使用示例
data = range(25)
for i, chunk in enumerate(chunked(data, 10)):
print(f"第{i+1}块: {chunk}")
迭代器协议是Python高效处理数据的基础,掌握它能让你写出更Pythonic的代码。