← 返回首页

算法优化

📂 python ⏱ 5 min 841 words

算法优化

算法优化是提高程序性能的关键。理解时间复杂度、空间复杂度以及合适的数据结构选择,能让你写出更高效的代码。

时间复杂度分析

时间复杂度描述算法执行时间随输入规模增长的趋势。

import time

def constant_time(n):
    """O(1) - 常数时间"""
    return n * 2

def linear_time(n):
    """O(n) - 线性时间"""
    total = 0
    for i in range(n):
        total += i
    return total

def quadratic_time(n):
    """O(n²) - 平方时间"""
    count = 0
    for i in range(n):
        for j in range(n):
            count += 1
    return count

def logarithmic_time(n):
    """O(log n) - 对数时间"""
    count = 0
    while n > 1:
        n //= 2
        count += 1
    return count

# 测试不同时间复杂度
sizes = [1000, 10000, 100000]

for size in sizes:
    start = time.time()
    constant_time(size)
    constant_time = time.time() - start
    
    start = time.time()
    linear_time(size)
    linear_time = time.time() - start
    
    start = time.time()
    quadratic_time(size // 100)  # 减小规模避免等待
    quadratic_time = time.time() - start
    
    start = time.time()
    logarithmic_time(size)
    logarithmic_time = time.time() - start
    
    print(f"n={size}:")
    print(f"  O(1):     {constant_time:.6f}秒")
    print(f"  O(n):     {linear_time:.6f}秒")
    print(f"  O(n²):    {quadratic_time:.6f}秒")
    print(f"  O(log n): {logarithmic_time:.6f}秒")

空间复杂度

空间复杂度描述算法使用的额外内存空间。

import sys

def list_vs_generator(n):
    """列表vs生成器的空间对比"""
    # 列表 - 占用O(n)空间
    list_comp = [i ** 2 for i in range(n)]
    
    # 生成器 - 占用O(1)空间
    gen_exp = (i ** 2 for i in range(n))
    
    return list_comp, gen_exp

# 空间对比
n = 1000000
list_data, gen_data = list_vs_generator(n)

print(f"列表占用内存: {sys.getsizeof(list_data)} 字节")
print(f"生成器占用内存: {sys.getsizeof(gen_data)} 字节")

原地操作

import sys

def not_inplace(data):
    """非原地操作"""
    return sorted(data)  # 创建新列表

def inplace(data):
    """原地操作"""
    data.sort()  # 修改原列表
    return data

# 空间对比
data1 = list(range(1000000))
data2 = list(range(1000000))

print(f"原始列表内存: {sys.getsizeof(data1)} 字节")

sorted1 = not_inplace(data1)
print(f"sorted()后新列表内存: {sys.getsizeof(sorted1)} 字节")
print(f"原始列表内存: {sys.getsizeof(data1)} 字节")  # 不变

inplace(data2)
print(f"sort()后原列表内存: {sys.getsizeof(data2)} 字节")

缓存策略

lru_cache装饰器

from functools import lru_cache
import time

@lru_cache(maxsize=128)
def fibonacci_cached(n):
    """带缓存的斐波那契数列"""
    if n < 2:
        return n
    return fibonacci_cached(n-1) + fibonacci_cached(n-2)

def fibonacci_plain(n):
    """普通斐波那契数列"""
    if n < 2:
        return n
    return fibonacci_plain(n-1) + fibonacci_plain(n-2)

# 性能对比
n = 35

start = time.time()
result1 = fibonacci_plain(n)
time1 = time.time() - start

start = time.time()
result2 = fibonacci_cached(n)
time2 = time.time() - start

print(f"结果: {result1} = {result2}")
print(f"普通递归: {time1:.4f}秒")
print(f"带缓存递归: {time2:.6f}秒")
print(f"加速比: {time1/time2:.0f}倍")

自定义缓存

import time
from collections import OrderedDict

class LRUCache:
    """LRU缓存实现"""
    
    def __init__(self, capacity: int):
        self.capacity = capacity
        self.cache = OrderedDict()
    
    def get(self, key):
        if key in self.cache:
            self.cache.move_to_end(key)
            return self.cache[key]
        return -1
    
    def put(self, key, value):
        if key in self.cache:
            self.cache.move_to_end(key)
        self.cache[key] = value
        if len(self.cache) > self.capacity:
            self.cache.popitem(last=False)
    
    def __repr__(self):
        return f"LRUCache({dict(self.cache)})"

# 使用示例
cache = LRUCache(3)

cache.put(1, "a")
cache.put(2, "b")
cache.put(3, "c")
print(cache)  # {1: 'a', 2: 'b', 3: 'c'}

cache.get(1)  # 访问key 1,使其成为最近使用
cache.put(4, "d")  # 添加key 4,key 2被淘汰
print(cache)  # {3: 'c', 1: 'a', 4: 'd'}

记忆化装饰器

import time
from functools import wraps

def memoize(func):
    """记忆化装饰器"""
    cache = {}
    
    @wraps(func)
    def wrapper(*args):
        if args in cache:
            return cache[args]
        result = func(*args)
        cache[args] = result
        return result
    
    wrapper.cache_info = lambda: f"缓存大小: {len(cache)}"
    wrapper.cache_clear = lambda: cache.clear()
    
    return wrapper

@memoize
def expensive_calculation(n):
    """耗时计算"""
    time.sleep(0.1)  # 模拟耗时
    return n ** 2

# 使用
start = time.time()
result1 = expensive_calculation(100)
time1 = time.time() - start

start = time.time()
result2 = expensive_calculation(100)
time2 = time.time() - start

print(f"第一次计算: {time1:.4f}秒")
print(f"第二次计算: {time2:.6f}秒")
print(expensive_calculation.cache_info())

数据结构选择

列表vs集合vs字典

import time

# 查找性能对比
data = list(range(100000))
target = 99999

# 列表查找
start = time.time()
for _ in range(1000):
    _ = target in data
list_time = time.time() - start

# 集合查找
data_set = set(data)
start = time.time()
for _ in range(1000):
    _ = target in data_set
set_time = time.time() - start

# 字典查找
data_dict = {x: x for x in data}
start = time.time()
for _ in range(1000):
    _ = target in data_dict
dict_time = time.time() - start

print(f"列表查找: {list_time:.4f}秒")
print(f"集合查找: {set_time:.6f}秒")
print(f"字典查找: {dict_time:.6f}秒")

deque vs list

from collections import deque
import time

# 在头部操作性能对比
size = 100000

# 列表头部插入
list_data = []
start = time.time()
for i in range(size):
    list_data.insert(0, i)
list_time = time.time() - start

# deque头部插入
deque_data = deque()
start = time.time()
for i in range(size):
    deque_data.appendleft(i)
deque_time = time.time() - start

print(f"列表头部插入: {list_time:.4f}秒")
print(f"deque头部插入: {deque_time:.6f}秒")
print(f"deque快 {list_time/deque_time:.0f} 倍")

defaultdict vs setdefault

from collections import defaultdict
import time

# 分组操作性能对比
data = [(i % 10, i) for i in range(100000)]

# 使用setdefault
start = time.time()
result1 = {}
for key, value in data:
    result1.setdefault(key, []).append(value)
setdefault_time = time.time() - start

# 使用defaultdict
start = time.time()
result2 = defaultdict(list)
for key, value in data:
    result2[key].append(value)
defaultdict_time = time.time() - start

print(f"setdefault: {setdefault_time:.4f}秒")
print(f"defaultdict: {defaultdict_time:.4f}秒")

常见算法优化技巧

双指针技术

def two_sum_sorted(nums, target):
    """有序数组两数之和 - O(n)"""
    left, right = 0, len(nums) - 1
    
    while left < right:
        current_sum = nums[left] + nums[right]
        if current_sum == target:
            return left, right
        elif current_sum < target:
            left += 1
        else:
            right -= 1
    
    return -1, -1

# 示例
nums = [1, 2, 3, 4, 5, 6, 7, 8, 9]
target = 10
print(two_sum_sorted(nums, target))  # (0, 8)

滑动窗口

def max_subarray_sum(nums, k):
    """最大子数组和 - O(n)"""
    if len(nums) < k:
        return 0
    
    # 计算第一个窗口
    window_sum = sum(nums[:k])
    max_sum = window_sum
    
    # 滑动窗口
    for i in range(k, len(nums)):
        window_sum = window_sum - nums[i - k] + nums[i]
        max_sum = max(max_sum, window_sum)
    
    return max_sum

# 示例
nums = [1, 4, 2, 10, 23, 3, 1, 0, 20]
k = 4
print(max_subarray_sum(nums, k))  # 39

二分查找

def binary_search(arr, target):
    """二分查找 - O(log n)"""
    left, right = 0, len(arr) - 1
    
    while left <= right:
        mid = (left + right) // 2
        if arr[mid] == target:
            return mid
        elif arr[mid] < target:
            left = mid + 1
        else:
            right = mid - 1
    
    return -1

# 示例
arr = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
target = 7
print(binary_search(arr, target))  # 6

优化原则

  1. 先分析后优化:使用性能分析工具找到真正的瓶颈
  2. 选择合适的算法:根据问题规模选择合适的算法复杂度
  3. 选择合适的数据结构:根据操作类型选择高效的数据结构
  4. 利用缓存:对重复计算使用记忆化
  5. 避免不必要的操作:减少循环次数,避免重复计算
  6. 使用内置函数:Python内置函数通常比手动实现更快
  7. 考虑空间换时间:在内存允许的情况下,使用缓存提高速度

算法优化是程序性能优化的核心,掌握这些技巧能让你写出高效的Python代码。