算法优化
算法优化
算法优化是提高程序性能的关键。理解时间复杂度、空间复杂度以及合适的数据结构选择,能让你写出更高效的代码。
时间复杂度分析
时间复杂度描述算法执行时间随输入规模增长的趋势。
import time
def constant_time(n):
"""O(1) - 常数时间"""
return n * 2
def linear_time(n):
"""O(n) - 线性时间"""
total = 0
for i in range(n):
total += i
return total
def quadratic_time(n):
"""O(n²) - 平方时间"""
count = 0
for i in range(n):
for j in range(n):
count += 1
return count
def logarithmic_time(n):
"""O(log n) - 对数时间"""
count = 0
while n > 1:
n //= 2
count += 1
return count
# 测试不同时间复杂度
sizes = [1000, 10000, 100000]
for size in sizes:
start = time.time()
constant_time(size)
constant_time = time.time() - start
start = time.time()
linear_time(size)
linear_time = time.time() - start
start = time.time()
quadratic_time(size // 100) # 减小规模避免等待
quadratic_time = time.time() - start
start = time.time()
logarithmic_time(size)
logarithmic_time = time.time() - start
print(f"n={size}:")
print(f" O(1): {constant_time:.6f}秒")
print(f" O(n): {linear_time:.6f}秒")
print(f" O(n²): {quadratic_time:.6f}秒")
print(f" O(log n): {logarithmic_time:.6f}秒")
空间复杂度
空间复杂度描述算法使用的额外内存空间。
import sys
def list_vs_generator(n):
"""列表vs生成器的空间对比"""
# 列表 - 占用O(n)空间
list_comp = [i ** 2 for i in range(n)]
# 生成器 - 占用O(1)空间
gen_exp = (i ** 2 for i in range(n))
return list_comp, gen_exp
# 空间对比
n = 1000000
list_data, gen_data = list_vs_generator(n)
print(f"列表占用内存: {sys.getsizeof(list_data)} 字节")
print(f"生成器占用内存: {sys.getsizeof(gen_data)} 字节")
原地操作
import sys
def not_inplace(data):
"""非原地操作"""
return sorted(data) # 创建新列表
def inplace(data):
"""原地操作"""
data.sort() # 修改原列表
return data
# 空间对比
data1 = list(range(1000000))
data2 = list(range(1000000))
print(f"原始列表内存: {sys.getsizeof(data1)} 字节")
sorted1 = not_inplace(data1)
print(f"sorted()后新列表内存: {sys.getsizeof(sorted1)} 字节")
print(f"原始列表内存: {sys.getsizeof(data1)} 字节") # 不变
inplace(data2)
print(f"sort()后原列表内存: {sys.getsizeof(data2)} 字节")
缓存策略
lru_cache装饰器
from functools import lru_cache
import time
@lru_cache(maxsize=128)
def fibonacci_cached(n):
"""带缓存的斐波那契数列"""
if n < 2:
return n
return fibonacci_cached(n-1) + fibonacci_cached(n-2)
def fibonacci_plain(n):
"""普通斐波那契数列"""
if n < 2:
return n
return fibonacci_plain(n-1) + fibonacci_plain(n-2)
# 性能对比
n = 35
start = time.time()
result1 = fibonacci_plain(n)
time1 = time.time() - start
start = time.time()
result2 = fibonacci_cached(n)
time2 = time.time() - start
print(f"结果: {result1} = {result2}")
print(f"普通递归: {time1:.4f}秒")
print(f"带缓存递归: {time2:.6f}秒")
print(f"加速比: {time1/time2:.0f}倍")
自定义缓存
import time
from collections import OrderedDict
class LRUCache:
"""LRU缓存实现"""
def __init__(self, capacity: int):
self.capacity = capacity
self.cache = OrderedDict()
def get(self, key):
if key in self.cache:
self.cache.move_to_end(key)
return self.cache[key]
return -1
def put(self, key, value):
if key in self.cache:
self.cache.move_to_end(key)
self.cache[key] = value
if len(self.cache) > self.capacity:
self.cache.popitem(last=False)
def __repr__(self):
return f"LRUCache({dict(self.cache)})"
# 使用示例
cache = LRUCache(3)
cache.put(1, "a")
cache.put(2, "b")
cache.put(3, "c")
print(cache) # {1: 'a', 2: 'b', 3: 'c'}
cache.get(1) # 访问key 1,使其成为最近使用
cache.put(4, "d") # 添加key 4,key 2被淘汰
print(cache) # {3: 'c', 1: 'a', 4: 'd'}
记忆化装饰器
import time
from functools import wraps
def memoize(func):
"""记忆化装饰器"""
cache = {}
@wraps(func)
def wrapper(*args):
if args in cache:
return cache[args]
result = func(*args)
cache[args] = result
return result
wrapper.cache_info = lambda: f"缓存大小: {len(cache)}"
wrapper.cache_clear = lambda: cache.clear()
return wrapper
@memoize
def expensive_calculation(n):
"""耗时计算"""
time.sleep(0.1) # 模拟耗时
return n ** 2
# 使用
start = time.time()
result1 = expensive_calculation(100)
time1 = time.time() - start
start = time.time()
result2 = expensive_calculation(100)
time2 = time.time() - start
print(f"第一次计算: {time1:.4f}秒")
print(f"第二次计算: {time2:.6f}秒")
print(expensive_calculation.cache_info())
数据结构选择
列表vs集合vs字典
import time
# 查找性能对比
data = list(range(100000))
target = 99999
# 列表查找
start = time.time()
for _ in range(1000):
_ = target in data
list_time = time.time() - start
# 集合查找
data_set = set(data)
start = time.time()
for _ in range(1000):
_ = target in data_set
set_time = time.time() - start
# 字典查找
data_dict = {x: x for x in data}
start = time.time()
for _ in range(1000):
_ = target in data_dict
dict_time = time.time() - start
print(f"列表查找: {list_time:.4f}秒")
print(f"集合查找: {set_time:.6f}秒")
print(f"字典查找: {dict_time:.6f}秒")
deque vs list
from collections import deque
import time
# 在头部操作性能对比
size = 100000
# 列表头部插入
list_data = []
start = time.time()
for i in range(size):
list_data.insert(0, i)
list_time = time.time() - start
# deque头部插入
deque_data = deque()
start = time.time()
for i in range(size):
deque_data.appendleft(i)
deque_time = time.time() - start
print(f"列表头部插入: {list_time:.4f}秒")
print(f"deque头部插入: {deque_time:.6f}秒")
print(f"deque快 {list_time/deque_time:.0f} 倍")
defaultdict vs setdefault
from collections import defaultdict
import time
# 分组操作性能对比
data = [(i % 10, i) for i in range(100000)]
# 使用setdefault
start = time.time()
result1 = {}
for key, value in data:
result1.setdefault(key, []).append(value)
setdefault_time = time.time() - start
# 使用defaultdict
start = time.time()
result2 = defaultdict(list)
for key, value in data:
result2[key].append(value)
defaultdict_time = time.time() - start
print(f"setdefault: {setdefault_time:.4f}秒")
print(f"defaultdict: {defaultdict_time:.4f}秒")
常见算法优化技巧
双指针技术
def two_sum_sorted(nums, target):
"""有序数组两数之和 - O(n)"""
left, right = 0, len(nums) - 1
while left < right:
current_sum = nums[left] + nums[right]
if current_sum == target:
return left, right
elif current_sum < target:
left += 1
else:
right -= 1
return -1, -1
# 示例
nums = [1, 2, 3, 4, 5, 6, 7, 8, 9]
target = 10
print(two_sum_sorted(nums, target)) # (0, 8)
滑动窗口
def max_subarray_sum(nums, k):
"""最大子数组和 - O(n)"""
if len(nums) < k:
return 0
# 计算第一个窗口
window_sum = sum(nums[:k])
max_sum = window_sum
# 滑动窗口
for i in range(k, len(nums)):
window_sum = window_sum - nums[i - k] + nums[i]
max_sum = max(max_sum, window_sum)
return max_sum
# 示例
nums = [1, 4, 2, 10, 23, 3, 1, 0, 20]
k = 4
print(max_subarray_sum(nums, k)) # 39
二分查找
def binary_search(arr, target):
"""二分查找 - O(log n)"""
left, right = 0, len(arr) - 1
while left <= right:
mid = (left + right) // 2
if arr[mid] == target:
return mid
elif arr[mid] < target:
left = mid + 1
else:
right = mid - 1
return -1
# 示例
arr = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
target = 7
print(binary_search(arr, target)) # 6
优化原则
- 先分析后优化:使用性能分析工具找到真正的瓶颈
- 选择合适的算法:根据问题规模选择合适的算法复杂度
- 选择合适的数据结构:根据操作类型选择高效的数据结构
- 利用缓存:对重复计算使用记忆化
- 避免不必要的操作:减少循环次数,避免重复计算
- 使用内置函数:Python内置函数通常比手动实现更快
- 考虑空间换时间:在内存允许的情况下,使用缓存提高速度
算法优化是程序性能优化的核心,掌握这些技巧能让你写出高效的Python代码。