← 返回首页
🔍

Java正则表达式详解:Pattern与Matcher

📂 java ⏱ 5 min 847 words

Java正则表达式详解:Pattern与Matcher

概述

正则表达式(Regular Expression)是用于匹配字符串模式的强大工具。Java提供了java.util.regex包来支持正则表达式操作,包括Pattern和Matcher两个核心类。

1. 正则表达式基础

字符类

public class RegexBasics {
    public static void main(String[] args) {
        // 字符类
        // [abc] - 匹配a、b或c
        // [^abc] - 匹配除a、b、c之外的字符
        // [a-zA-Z] - 匹配所有字母
        // [0-9] - 匹配所有数字
        // . - 匹配任意字符(除了换行符)
        
        String text = "Hello World 123";
        
        System.out.println("匹配字母: " + text.matches(".*[a-zA-Z].*"));
        System.out.println("匹配数字: " + text.matches(".*[0-9].*"));
        System.out.println("匹配空格: " + text.matches(".*\\s.*"));
        
        // 预定义字符类
        // \\d - 数字 [0-9]
        // \\D - 非数字 [^0-9]
        // \\s - 空白字符
        // \\S - 非空白字符
        // \\w - 单词字符 [a-zA-Z0-9_]
        // \\W - 非单词字符
        
        System.out.println("包含数字: " + text.matches(".*\\d.*"));
        System.out.println("包含字母: " + text.matches(".*\\w.*"));
    }
}

量词

public class RegexQuantifiers {
    public static void main(String[] args) {
        // 量词
        // * - 零次或多次
        // + - 一次或多次
        // ? - 零次或一次
        // {n} - 恰好n次
        // {n,} - 至少n次
        // {n,m} - n到m次
        
        String[] texts = {"abc", "a", "ab", "abcde", "aabbc"};
        
        for (String text : texts) {
            System.out.println(text + ": ");
            System.out.println("  匹配a*: " + text.matches("a*"));
            System.out.println("  匹配a+: " + text.matches("a+"));
            System.out.println("  匹配a?: " + text.matches("a?"));
            System.out.println("  匹配a{2}: " + text.matches("a{2}"));
            System.out.println("  匹配a{2,}: " + text.matches("a{2,}"));
            System.out.println("  匹配a{1,3}: " + text.matches("a{1,3}"));
        }
    }
}

边界匹配

public class RegexBoundaries {
    public static void main(String[] args) {
        // 边界匹配
        // ^ - 行的开头
        // $ - 行的结尾
        // \\b - 单词边界
        // \\B - 非单词边界
        
        String text = "Hello World";
        
        System.out.println("以H开头: " + text.matches("^Hello.*"));
        System.out.println("以d结尾: " => text.matches(".*d$"));
        System.out.println("包含World: " + text.matches(".*\\bWorld\\b.*"));
    }
}

2. Pattern和Matcher

import java.util.regex.*;

public class PatternMatcherExample {
    public static void main(String[] args) {
        // 编译正则表达式
        Pattern pattern = Pattern.compile("\\d+");
        
        // 创建Matcher
        String text = "abc123def456ghi789";
        Matcher matcher = pattern.matcher(text);
        
        // 查找匹配
        while (matcher.find()) {
            System.out.println("找到: " + matcher.group());
            System.out.println("位置: " + matcher.start() + "-" + matcher.end());
        }
        
        // 完全匹配
        boolean matches = matcher.matches();
        System.out.println("完全匹配: " + matches);
        
        // 重置matcher
        matcher.reset();
        
        // 查找第一个匹配
        if (matcher.find()) {
            System.out.println("第一个匹配: " + matcher.group());
        }
    }
}

分组捕获

import java.util.regex.*;

public class GroupCaptureExample {
    public static void main(String[] args) {
        // 分组捕获
        Pattern pattern = Pattern.compile("(\\d{4})-(\\d{2})-(\\d{2})");
        Matcher matcher = pattern.matcher("今天是2024-01-15,明天是2024-01-16");
        
        while (matcher.find()) {
            System.out.println("完整匹配: " + matcher.group());
            System.out.println("年: " + matcher.group(1));
            System.out.println("月: " + matcher.group(2));
            System.out.println("日: " + matcher.group(3));
            System.out.println("开始位置: " + matcher.start());
            System.out.println("结束位置: " + matcher.end());
            System.out.println();
        }
        
        // 命名分组(Java 7+)
        Pattern namedPattern = Pattern.compile("(?<year>\\d{4})-(?<month>\\d{2})-(?<day>\\d{2})");
        Matcher namedMatcher = namedPattern.matcher("2024-01-15");
        
        if (namedMatcher.find()) {
            System.out.println("年: " + namedMatcher.group("year"));
            System.out.println("月: " + namedMatcher.group("month"));
            System.out.println("日: " + namedMatcher.group("day"));
        }
    }
}

3. 常用正则表达式

import java.util.regex.*;

public class CommonRegexPatterns {
    // 邮箱验证
    public static boolean isValidEmail(String email) {
        String regex = "^[A-Za-z0-9+_.-]+@(.+)$";
        return email.matches(regex);
    }
    
    // 手机号验证(中国大陆)
    public static boolean isValidPhone(String phone) {
        String regex = "^1[3-9]\\d{9}$";
        return phone.matches(regex);
    }
    
    // 身份证号验证
    public static boolean isValidIdCard(String idCard) {
        String regex = "^\\d{17}[\\dXx]$";
        return idCard.matches(regex);
    }
    
    // URL验证
    public static boolean isValidURL(String url) {
        String regex = "^(https?|ftp)://[^\\s/$.?#].[^\\s]*$";
        return url.matches(regex);
    }
    
    // IP地址验证
    public static boolean isValidIP(String ip) {
        String regex = "^((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$";
        return ip.matches(regex);
    }
    
    public static void main(String[] args) {
        System.out.println("邮箱验证:");
        System.out.println("  test@example.com: " + isValidEmail("test@example.com"));
        System.out.println("  invalid-email: " + isValidEmail("invalid-email"));
        
        System.out.println("\n手机号验证:");
        System.out.println("  13812345678: " + isValidPhone("13812345678"));
        System.out.println("  12345678: " + isValidPhone("12345678"));
        
        System.out.println("\n身份证号验证:");
        System.out.println("  110101199001011234: " + isValidIdCard("110101199001011234"));
        
        System.out.println("\nURL验证:");
        System.out.println("  https://www.example.com: " + isValidURL("https://www.example.com"));
        System.out.println("  invalid-url: " + isValidURL("invalid-url"));
        
        System.out.println("\nIP地址验证:");
        System.out.println("  192.168.1.1: " + isValidIP("192.168.1.1"));
        System.out.println("  999.999.999.999: " + isValidIP("999.999.999.999"));
    }
}

4. 字符串操作

import java.util.regex.*;

public class StringOperations {
    public static void main(String[] args) {
        String text = "Hello World 123 Java 456";
        
        // 查找所有数字
        Pattern digitPattern = Pattern.compile("\\d+");
        Matcher matcher = digitPattern.matcher(text);
        
        System.out.println("所有数字:");
        while (matcher.find()) {
            System.out.println("  " + matcher.group());
        }
        
        // 替换
        String replaced = text.replaceAll("\\d+", "#");
        System.out.println("\n替换数字: " + replaced);
        
        // 分割
        String[] words = text.split("\\s+");
        System.out.println("\n分割单词:");
        for (String word : words) {
            System.out.println("  " + word);
        }
        
        // 提取邮箱
        String emailText = "联系我们: info@example.com 或 support@test.org";
        Pattern emailPattern = Pattern.compile("[A-Za-z0-9+_.-]+@(.+)");
        Matcher emailMatcher = emailPattern.matcher(emailText);
        
        System.out.println("\n提取邮箱:");
        while (emailMatcher.find()) {
            System.out.println("  " + emailMatcher.group());
        }
        
        // 验证格式
        String date = "2024-01-15";
        boolean isValidDate = date.matches("^\\d{4}-\\d{2}-\\d{2}$");
        System.out.println("\n日期格式有效: " + isValidDate);
    }
}

5. 实际应用示例

日志解析器

import java.util.regex.*;
import java.util.*;

public class LogParser {
    private static final Pattern LOG_PATTERN = Pattern.compile(
        "(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}) \\[(\\w+)\\] (.*)"
    );
    
    public static Map<String, String> parseLog(String logLine) {
        Map<String, String> logData = new HashMap<>();
        Matcher matcher = LOG_PATTERN.matcher(logLine);
        
        if (matcher.matches()) {
            logData.put("timestamp", matcher.group(1));
            logData.put("level", matcher.group(2));
            logData.put("message", matcher.group(3));
        }
        
        return logData;
    }
    
    public static void main(String[] args) {
        String[] logs = {
            "2024-01-15 10:30:00 [INFO] Application started",
            "2024-01-15 10:30:05 [ERROR] Database connection failed",
            "2024-01-15 10:30:10 [DEBUG] Processing request"
        };
        
        for (String log : logs) {
            Map<String, String> parsed = parseLog(log);
            System.out.println("日志: " + log);
            System.out.println("  时间: " + parsed.get("timestamp"));
            System.out.println("  级别: " + parsed.get("level"));
            System.out.println("  消息: " + parsed.get("message"));
            System.out.println();
        }
    }
}

模板引擎

import java.util.regex.*;
import java.util.*;

public class SimpleTemplateEngine {
    private static final Pattern TEMPLATE_PATTERN = Pattern.compile("\\{(\\w+)\\}");
    
    public static String render(String template, Map<String, String> data) {
        Matcher matcher = TEMPLATE_PATTERN.matcher(template);
        StringBuffer result = new StringBuffer();
        
        while (matcher.find()) {
            String key = matcher.group(1);
            String value = data.getOrDefault(key, matcher.group());
            matcher.appendReplacement(result, Matcher.quoteReplacement(value));
        }
        matcher.appendTail(result);
        
        return result.toString();
    }
    
    public static void main(String[] args) {
        String template = "Hello, {name}! Welcome to {city}. Today is {date}.";
        
        Map<String, String> data = new HashMap<>();
        data.put("name", "Alice");
        data.put("city", "Beijing");
        data.put("date", "2024-01-15");
        
        String result = render(template, data);
        System.out.println("模板渲染结果: " + result);
    }
}

6. 最佳实践

  1. 预编译Pattern:重复使用的正则表达式应该预编译
  2. 使用原始字符串:避免双重转义
  3. 测试正则表达式:使用在线工具测试正则表达式
  4. 注意性能:避免使用过于复杂的正则表达式
  5. 文档化模式:清楚地文档化正则表达式的用途

总结

正则表达式是Java中强大的字符串处理工具。掌握正则表达式的语法和使用方法,可以高效地处理各种字符串操作任务。在实际编程中,要根据需求选择合适的正则表达式,并注意性能和可读性。