Java正则表达式详解:Pattern与Matcher
Java正则表达式详解:Pattern与Matcher
概述
正则表达式(Regular Expression)是用于匹配字符串模式的强大工具。Java提供了java.util.regex包来支持正则表达式操作,包括Pattern和Matcher两个核心类。
1. 正则表达式基础
字符类
public class RegexBasics {
public static void main(String[] args) {
// 字符类
// [abc] - 匹配a、b或c
// [^abc] - 匹配除a、b、c之外的字符
// [a-zA-Z] - 匹配所有字母
// [0-9] - 匹配所有数字
// . - 匹配任意字符(除了换行符)
String text = "Hello World 123";
System.out.println("匹配字母: " + text.matches(".*[a-zA-Z].*"));
System.out.println("匹配数字: " + text.matches(".*[0-9].*"));
System.out.println("匹配空格: " + text.matches(".*\\s.*"));
// 预定义字符类
// \\d - 数字 [0-9]
// \\D - 非数字 [^0-9]
// \\s - 空白字符
// \\S - 非空白字符
// \\w - 单词字符 [a-zA-Z0-9_]
// \\W - 非单词字符
System.out.println("包含数字: " + text.matches(".*\\d.*"));
System.out.println("包含字母: " + text.matches(".*\\w.*"));
}
}
量词
public class RegexQuantifiers {
public static void main(String[] args) {
// 量词
// * - 零次或多次
// + - 一次或多次
// ? - 零次或一次
// {n} - 恰好n次
// {n,} - 至少n次
// {n,m} - n到m次
String[] texts = {"abc", "a", "ab", "abcde", "aabbc"};
for (String text : texts) {
System.out.println(text + ": ");
System.out.println(" 匹配a*: " + text.matches("a*"));
System.out.println(" 匹配a+: " + text.matches("a+"));
System.out.println(" 匹配a?: " + text.matches("a?"));
System.out.println(" 匹配a{2}: " + text.matches("a{2}"));
System.out.println(" 匹配a{2,}: " + text.matches("a{2,}"));
System.out.println(" 匹配a{1,3}: " + text.matches("a{1,3}"));
}
}
}
边界匹配
public class RegexBoundaries {
public static void main(String[] args) {
// 边界匹配
// ^ - 行的开头
// $ - 行的结尾
// \\b - 单词边界
// \\B - 非单词边界
String text = "Hello World";
System.out.println("以H开头: " + text.matches("^Hello.*"));
System.out.println("以d结尾: " => text.matches(".*d$"));
System.out.println("包含World: " + text.matches(".*\\bWorld\\b.*"));
}
}
2. Pattern和Matcher
import java.util.regex.*;
public class PatternMatcherExample {
public static void main(String[] args) {
// 编译正则表达式
Pattern pattern = Pattern.compile("\\d+");
// 创建Matcher
String text = "abc123def456ghi789";
Matcher matcher = pattern.matcher(text);
// 查找匹配
while (matcher.find()) {
System.out.println("找到: " + matcher.group());
System.out.println("位置: " + matcher.start() + "-" + matcher.end());
}
// 完全匹配
boolean matches = matcher.matches();
System.out.println("完全匹配: " + matches);
// 重置matcher
matcher.reset();
// 查找第一个匹配
if (matcher.find()) {
System.out.println("第一个匹配: " + matcher.group());
}
}
}
分组捕获
import java.util.regex.*;
public class GroupCaptureExample {
public static void main(String[] args) {
// 分组捕获
Pattern pattern = Pattern.compile("(\\d{4})-(\\d{2})-(\\d{2})");
Matcher matcher = pattern.matcher("今天是2024-01-15,明天是2024-01-16");
while (matcher.find()) {
System.out.println("完整匹配: " + matcher.group());
System.out.println("年: " + matcher.group(1));
System.out.println("月: " + matcher.group(2));
System.out.println("日: " + matcher.group(3));
System.out.println("开始位置: " + matcher.start());
System.out.println("结束位置: " + matcher.end());
System.out.println();
}
// 命名分组(Java 7+)
Pattern namedPattern = Pattern.compile("(?<year>\\d{4})-(?<month>\\d{2})-(?<day>\\d{2})");
Matcher namedMatcher = namedPattern.matcher("2024-01-15");
if (namedMatcher.find()) {
System.out.println("年: " + namedMatcher.group("year"));
System.out.println("月: " + namedMatcher.group("month"));
System.out.println("日: " + namedMatcher.group("day"));
}
}
}
3. 常用正则表达式
import java.util.regex.*;
public class CommonRegexPatterns {
// 邮箱验证
public static boolean isValidEmail(String email) {
String regex = "^[A-Za-z0-9+_.-]+@(.+)$";
return email.matches(regex);
}
// 手机号验证(中国大陆)
public static boolean isValidPhone(String phone) {
String regex = "^1[3-9]\\d{9}$";
return phone.matches(regex);
}
// 身份证号验证
public static boolean isValidIdCard(String idCard) {
String regex = "^\\d{17}[\\dXx]$";
return idCard.matches(regex);
}
// URL验证
public static boolean isValidURL(String url) {
String regex = "^(https?|ftp)://[^\\s/$.?#].[^\\s]*$";
return url.matches(regex);
}
// IP地址验证
public static boolean isValidIP(String ip) {
String regex = "^((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$";
return ip.matches(regex);
}
public static void main(String[] args) {
System.out.println("邮箱验证:");
System.out.println(" test@example.com: " + isValidEmail("test@example.com"));
System.out.println(" invalid-email: " + isValidEmail("invalid-email"));
System.out.println("\n手机号验证:");
System.out.println(" 13812345678: " + isValidPhone("13812345678"));
System.out.println(" 12345678: " + isValidPhone("12345678"));
System.out.println("\n身份证号验证:");
System.out.println(" 110101199001011234: " + isValidIdCard("110101199001011234"));
System.out.println("\nURL验证:");
System.out.println(" https://www.example.com: " + isValidURL("https://www.example.com"));
System.out.println(" invalid-url: " + isValidURL("invalid-url"));
System.out.println("\nIP地址验证:");
System.out.println(" 192.168.1.1: " + isValidIP("192.168.1.1"));
System.out.println(" 999.999.999.999: " + isValidIP("999.999.999.999"));
}
}
4. 字符串操作
import java.util.regex.*;
public class StringOperations {
public static void main(String[] args) {
String text = "Hello World 123 Java 456";
// 查找所有数字
Pattern digitPattern = Pattern.compile("\\d+");
Matcher matcher = digitPattern.matcher(text);
System.out.println("所有数字:");
while (matcher.find()) {
System.out.println(" " + matcher.group());
}
// 替换
String replaced = text.replaceAll("\\d+", "#");
System.out.println("\n替换数字: " + replaced);
// 分割
String[] words = text.split("\\s+");
System.out.println("\n分割单词:");
for (String word : words) {
System.out.println(" " + word);
}
// 提取邮箱
String emailText = "联系我们: info@example.com 或 support@test.org";
Pattern emailPattern = Pattern.compile("[A-Za-z0-9+_.-]+@(.+)");
Matcher emailMatcher = emailPattern.matcher(emailText);
System.out.println("\n提取邮箱:");
while (emailMatcher.find()) {
System.out.println(" " + emailMatcher.group());
}
// 验证格式
String date = "2024-01-15";
boolean isValidDate = date.matches("^\\d{4}-\\d{2}-\\d{2}$");
System.out.println("\n日期格式有效: " + isValidDate);
}
}
5. 实际应用示例
日志解析器
import java.util.regex.*;
import java.util.*;
public class LogParser {
private static final Pattern LOG_PATTERN = Pattern.compile(
"(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}) \\[(\\w+)\\] (.*)"
);
public static Map<String, String> parseLog(String logLine) {
Map<String, String> logData = new HashMap<>();
Matcher matcher = LOG_PATTERN.matcher(logLine);
if (matcher.matches()) {
logData.put("timestamp", matcher.group(1));
logData.put("level", matcher.group(2));
logData.put("message", matcher.group(3));
}
return logData;
}
public static void main(String[] args) {
String[] logs = {
"2024-01-15 10:30:00 [INFO] Application started",
"2024-01-15 10:30:05 [ERROR] Database connection failed",
"2024-01-15 10:30:10 [DEBUG] Processing request"
};
for (String log : logs) {
Map<String, String> parsed = parseLog(log);
System.out.println("日志: " + log);
System.out.println(" 时间: " + parsed.get("timestamp"));
System.out.println(" 级别: " + parsed.get("level"));
System.out.println(" 消息: " + parsed.get("message"));
System.out.println();
}
}
}
模板引擎
import java.util.regex.*;
import java.util.*;
public class SimpleTemplateEngine {
private static final Pattern TEMPLATE_PATTERN = Pattern.compile("\\{(\\w+)\\}");
public static String render(String template, Map<String, String> data) {
Matcher matcher = TEMPLATE_PATTERN.matcher(template);
StringBuffer result = new StringBuffer();
while (matcher.find()) {
String key = matcher.group(1);
String value = data.getOrDefault(key, matcher.group());
matcher.appendReplacement(result, Matcher.quoteReplacement(value));
}
matcher.appendTail(result);
return result.toString();
}
public static void main(String[] args) {
String template = "Hello, {name}! Welcome to {city}. Today is {date}.";
Map<String, String> data = new HashMap<>();
data.put("name", "Alice");
data.put("city", "Beijing");
data.put("date", "2024-01-15");
String result = render(template, data);
System.out.println("模板渲染结果: " + result);
}
}
6. 最佳实践
- 预编译Pattern:重复使用的正则表达式应该预编译
- 使用原始字符串:避免双重转义
- 测试正则表达式:使用在线工具测试正则表达式
- 注意性能:避免使用过于复杂的正则表达式
- 文档化模式:清楚地文档化正则表达式的用途
总结
正则表达式是Java中强大的字符串处理工具。掌握正则表达式的语法和使用方法,可以高效地处理各种字符串操作任务。在实际编程中,要根据需求选择合适的正则表达式,并注意性能和可读性。