itertools模块
itertools模块概述
itertools模块提供了用于操作迭代器的函数集合,可以高效地处理数据序列。
chain - 链接迭代器
chain可以将多个可迭代对象连接成一个。
from itertools import chain
# 基本用法
list1 = [1, 2, 3]
list2 = [4, 5, 6]
list3 = [7, 8, 9]
# 连接多个列表
combined = list(chain(list1, list2, list3))
print(combined) # 输出: [1, 2, 3, 4, 5, 6, 7, 8, 9]
# 连接字符串
words = ['Hello', 'World']
result = list(chain.from_iterable(words))
print(result) # 输出: ['H', 'e', 'l', 'l', 'o', 'W', 'o', 'r', 'l', 'd']
# 实际应用:合并字典
dict1 = {'a': 1, 'b': 2}
dict2 = {'c': 3, 'd': 4}
merged = dict(chain(dict1.items(), dict2.items()))
print(merged) # 输出: {'a': 1, 'b': 2, 'c': 3, 'd': 4}
product - 笛卡尔积
product计算多个可迭代对象的笛卡尔积。
from itertools import product
# 基本用法
colors = ['red', 'blue']
sizes = ['S', 'M', 'L']
# 生成所有组合
combinations = list(product(colors, sizes))
print(combinations)
# 输出: [('red', 'S'), ('red', 'M'), ('red', 'L'), ('blue', 'S'), ('blue', 'M'), ('blue', 'L')]
# 使用repeat参数
digits = [0, 1]
binary = list(product(digits, repeat=3))
print(binary)
# 输出: [(0, 0, 0), (0, 0, 1), (0, 1, 0), (0, 1, 1), (1, 0, 0), (1, 0, 1), (1, 1, 0), (1, 1, 1)]
# 实际应用:生成密码组合
import string
chars = string.ascii_lowercase + string.digits
# 生成所有3位密码
passwords = list(product(chars, repeat=3))
print(f"3位密码组合数: {len(passwords)}")
combinations - 组合
combinations返回可迭代对象中所有长度为r的组合。
from itertools import combinations
# 基本用法
items = ['A', 'B', 'C', 'D']
# 2个元素的组合
combos = list(combinations(items, 2))
print(combos)
# 输出: [('A', 'B'), ('A', 'C'), ('A', 'D'), ('B', 'C'), ('B', 'D'), ('C', 'D')]
# 实际应用:投票组合
candidates = ['Alice', 'Bob', 'Charlie']
# 选择2位候选人
selected = list(combinations(candidates, 2))
print("候选组合:")
for combo in selected:
print(f" {combo}")
# 带重复的组合
from itertools import combinations_with_replacement
combos_rep = list(combinations_with_replacement([1, 2, 3], 2))
print(combos_rep)
# 输出: [(1, 1), (1, 2), (1, 3), (2, 2), (2, 3), (3, 3)]
permutations - 排列
permutations返回可迭代对象中所有长度为r的排列。
from itertools import permutations
# 基本用法
items = ['A', 'B', 'C']
# 2个元素的排列
perms = list(permutations(items, 2))
print(perms)
# 输出: [('A', 'B'), ('A', 'C'), ('B', 'A'), ('B', 'C'), ('C', 'A'), ('C', 'B')]
# 所有排列
all_perms = list(permutations(items))
print(f"总排列数: {len(all_perms)}") # 输出: 总排列数: 6
# 实际应用:密码破解
import string
# 4位PIN码的所有可能
digits = string.digits
pin_perms = list(permutations(digits, 4))
print(f"4位PIN码组合数: {len(pin_perms)}")
其他实用函数
islice - 切片迭代器
from itertools import islice
# 基本用法
data = range(100)
# 获取前5个元素
first_five = list(islice(data, 5))
print(first_five) # 输出: [0, 1, 2, 3, 4]
# 获取索引2到5的元素
sliced = list(islice(data, 2, 7))
print(sliced) # 输出: [2, 3, 4, 5, 6]
# 获取步长为2的元素
stepped = list(islice(data, 0, 10, 2))
print(stepped) # 输出: [0, 2, 4, 6, 8]
groupby - 分组
from itertools import groupby
# 基本用法(需要先排序)
data = [
{'name': 'Alice', 'grade': 'A'},
{'name': 'Bob', 'grade': 'B'},
{'name': 'Charlie', 'grade': 'A'},
{'name': 'David', 'grade': 'B'}
]
# 按成绩分组
data.sort(key=lambda x: x['grade'])
for grade, group in groupby(data, key=lambda x: x['grade']):
print(f"成绩 {grade}:")
for item in group:
print(f" {item['name']}")
# 实际应用:按月份分组日期
from datetime import datetime
dates = [
datetime(2024, 1, 15),
datetime(2024, 1, 20),
datetime(2024, 2, 5),
datetime(2024, 2, 10),
datetime(2024, 3, 1)
]
dates.sort(key=lambda d: d.month)
for month, group in groupby(dates, key=lambda d: d.month):
print(f"\n月份 {month}:")
for date in group:
print(f" {date.strftime('%Y-%m-%d')}")
cycle - 循环迭代
from itertools import cycle
# 基本用法
colors = cycle(['red', 'blue', 'green'])
# 获取前6个元素
result = [next(colors) for _ in range(6)]
print(result) # 输出: ['red', 'blue', 'green', 'red', 'blue', 'green']
# 实际应用:交替任务分配
tasks = ['任务1', '任务2', '任务3']
workers = cycle(['Alice', 'Bob', 'Charlie'])
assignments = list(zip(tasks, workers))
print(assignments)
# 输出: [('任务1', 'Alice'), ('任务2', 'Bob'), ('任务3', 'Charlie')]
实际应用案例
1. 数据分析
from itertools import combinations
from collections import Counter
def find_frequent_pairs(transactions):
"""找出频繁购买的商品对"""
pair_count = Counter()
for transaction in transactions:
for pair in combinations(transaction, 2):
pair_count[tuple(sorted(pair))] += 1
return pair_count.most_common(5)
# 使用
transactions = [
['牛奶', '面包'],
['牛奶', '尿布'],
['面包', '尿布'],
['牛奶', '面包', '尿布'],
['牛奶', '面包']
]
frequent = find_frequent_pairs(transactions)
print("频繁购买的商品对:")
for pair, count in frequent:
print(f" {pair}: {count}次")
2. 生成测试用例
from itertools import product
def generate_test_cases():
"""生成测试用例"""
inputs = [0, 1, -1]
operations = ['+', '-', '*']
test_cases = []
for a, b, op in product(inputs, repeat=2):
for operation in operations:
test_cases.append((a, operation, b))
return test_cases
# 使用
test_cases = generate_test_cases()
print(f"生成了{len(test_cases)}个测试用例")
for i, (a, op, b) in enumerate(test_cases[:5]):
print(f" 测试{i+1}: {a} {op} {b}")
3. 排列组合分析
from itertools import permutations, combinations
def analyze_permutations(items):
"""分析排列组合"""
n = len(items)
# 计算排列数
perm_count = len(list(permutations(items)))
# 计算组合数
combo_count = len(list(combinations(items, 2)))
print(f"元素数: {n}")
print(f"排列数: {perm_count}")
print(f"2元素组合数: {combo_count}")
# 显示所有排列
print("\n所有排列:")
for i, perm in enumerate(permutations(items), 1):
print(f" {i}. {''.join(perm)}")
# 使用
analyze_permutations(['A', 'B', 'C'])
性能优势
import time
from itertools import chain
# 使用itertools
start = time.time()
list1 = list(range(1000000))
list2 = list(range(1000000))
combined = list(chain(list1, list2))
itertools_time = time.time() - start
# 不使用itertools
start = time.time()
combined = list1 + list2
normal_time = time.time() - start
print(f"itertools: {itertools_time:.4f}秒")
print(f"普通方法: {normal_time:.4f}秒")
最佳实践
- 使用
chain连接多个可迭代对象 - 使用
product生成笛卡尔积 - 使用
combinations和permutations处理组合数学问题 - 使用
groupby前先排序 - 注意内存使用,itertools函数返回迭代器
itertools模块是Python中处理迭代器的强大工具,掌握它们能让你的代码更加高效和优雅。