Java正则表达式中发现的错误
我们似乎发现了java正则表达式的一个bug 我们正在尝试匹配不同的月-年模式,这些模式在一行中出现两次,或者只是在一行中出现两年的模式 但java似乎将今年的部分时间与空间分隔符混淆了。我已经向我的教授展示了这个问题,但我们无法解决它 具体来说,我们希望匹配“2013年1月-2014年1月”以及“2013年-2014年”。发生的情况是,在2013年,我们得到0以匹配月份和年份之间的分隔符,即使0不在分隔符模式中。所以我们最终得到了与2/13相同的结果 这是代码Java正则表达式中发现的错误,java,regex,Java,Regex,我们似乎发现了java正则表达式的一个bug 我们正在尝试匹配不同的月-年模式,这些模式在一行中出现两次,或者只是在一行中出现两年的模式 但java似乎将今年的部分时间与空间分隔符混淆了。我已经向我的教授展示了这个问题,但我们无法解决它 具体来说,我们希望匹配“2013年1月-2014年1月”以及“2013年-2014年”。发生的情况是,在2013年,我们得到0以匹配月份和年份之间的分隔符,即使0不在分隔符模式中。所以我们最终得到了与2/13相同的结果 这是代码 import java.
import java.io.IOException;
import java.util.ArrayList;
import java.util.GregorianCalendar;
import java.util.HashMap;
import java.util.concurrent.CountDownLatch;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.w3c.dom.css.Counter;
public class DatePattens {
//private ArrayList<MatchedDateObject> arryLstOfDates = new ArrayList<MatchedDateObject>();
private ArrayList<String> matchedString = new ArrayList<String>();
private HashMap<String,Integer> map ;
private String monthPattern = "((0[1-9]|1[012]|[1-9])|(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sept|Sep|Oct|Nov|Dec)[a-z]*)"; // 3 groups
private String monthAndYearSeperator="\\s*(\\s*|,|;|~|--|-|.|\\/)\\s*"; // 1 group
private String twoOrFourDigitYearPattern="(19[0-9]{2}|[2-9][0-9]{3}|[0-9]{2})\\s*"; // 1 group
private String presentPattern = "(Current|Present|Now|Currently|Presently|Till Date|Todate|Today)";
private String twoDatesSeperator = "\\s*(\\s*|-|~|--|,|to|til|till|until)\\s*"; // 1 group
private String twoOrFourDigitOrPresentYearPattern = presentPattern + "|" + twoOrFourDigitYearPattern; // 2 groups
private String secondIdenticalMonthPattern="(([1-9]|0[1-9]|1[012])|(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sept|Sep|Oct|Nov|Dec|January|February|March|April|May|June|July|August|September|October|November|December))"; // 3 groups
private String dateToDateCompletePatternOne=
monthPattern + monthAndYearSeperator + twoOrFourDigitYearPattern + twoDatesSeperator +
"((" + secondIdenticalMonthPattern +
monthAndYearSeperator +
twoOrFourDigitYearPattern +")|" +
presentPattern +")"
;
private Pattern patternAry = null;
private Matcher matcher = null;
public DatePattens() {
map = new HashMap<String,Integer>();
patternAry = Pattern.compile(dateToDateCompletePatternOne, Pattern.CASE_INSENSITIVE);
matcher = patternAry.matcher("");
}
//
// extract the two dates to look for duration afterwards
// 1. check if the a year pattern exists
// 1.1 if not skip to else at the end and return false
// 2. if yes get the rest of the line past year 1
// 3. check for year 2 or CURRENT/Present/...
public boolean matchTwoYearPattern(String inputLine){
String fname="matchTwoYearPattern";
Pattern firstYearPattern = Pattern
.compile(twoOrFourDigitYearPattern,Pattern.CASE_INSENSITIVE);
Matcher matcher1 = firstYearPattern.matcher("");
Pattern secondPattern = Pattern.compile(twoOrFourDigitOrPresentYearPattern,
Pattern.CASE_INSENSITIVE);
Matcher matcher2 = secondPattern.matcher("");
//long startTime = System.currentTimeMillis();
matcher1.reset(inputLine);
if (matcher1.find()) { // 1
String remaingString = inputLine.substring(matcher1.end(),
inputLine.length()); // 2
matcher2.reset(remaingString);
if (matcher2.find()) { // 3
return true;
}
}
return false; // 1.1 and end
}
public String matchAllDatePatterns(String line, int lineNum){
String fname = "matchAllPatterns:: ";
if (matchTwoYearPattern(line) == false) { // check if two years (or year and CURRENT/today...) present, if not return false
return("false:" + line);
}
else {
}
String matched = "";
int i = 0;
matcher.reset(line);
if (matcher.find()) {// here we are matching the pattern dateToDateCompletePatternOne
System.out.println(fname + "line: " +line);
System.out.println("group count "+matcher.groupCount());
System.out.println("group1 " +matcher.group(1));
System.out.println("group2 " +matcher.group(2));
System.out.println("group3 " +matcher.group(3));
System.out.println("group4 " +matcher.group(4));//so for 2013 - Jan 2013 input
//here matcher.group(4) is matching to 0 which we dont have in the pattern
System.out.println("group5 " +matcher.group(5));
System.out.println("group6 " +matcher.group(6));
System.out.println("group7 " +matcher.group(7));
System.out.println("group8 " +matcher.group(8));
System.out.println("group9 " +matcher.group(9));
System.out.println("group10 " +matcher.group(10));
System.out.println("group11 " +matcher.group(11));
System.out.println("group12 " +matcher.group(12));
System.out.println("group13 " +matcher.group(13));
System.out.println("group14 " + matcher.group(14));
}
return matched;
}
public static void main(String args[]){
DatePattens dp= new DatePattens();
String fileName = "Resume.txt";
try {
ReadFile file = new ReadFile(fileName);
String[] aryLines = file.openFile();
int i=0;
long startTime =System.currentTimeMillis();
for (String input : aryLines) {
String output = dp.matchAllDatePatterns(input, i);
i++;
}
long endTime =System.currentTimeMillis();
System.out.println("Time required for this operation :" + ((endTime-startTime)*0.001));
} catch (IOException e) {
System.out.println(e);
}
}
}
import java.io.IOException;
导入java.util.ArrayList;
导入java.util.GregorianCalendar;
导入java.util.HashMap;
导入java.util.concurrent.CountDownLatch;
导入java.util.regex.Matcher;
导入java.util.regex.Pattern;
导入org.w3c.dom.css.Counter;
公共类模式{
//private ArrayList arryLstOfDates=new ArrayList();
private ArrayList matchedString=new ArrayList();
私有哈希映射;
私有字符串monthPattern=“(0[1-9]| 1[012]|[1-9])|(一月|二月|三月|四月|五月|六月|七月|八月|九月|九月|十月|十一月|十二月)[a-z]*”//3组
私有字符串MontHandyearSeparator=“\\s*(\\s*|,| | ~-|-|-|-|/)\\s*”;//1组
私有字符串twoOrFourDigitYearPattern=“(19[0-9]{2}|[2-9][0-9]{3}|[0-9]{2})\\s*”;//1组
私有字符串presentPattern=“(当前|当前|现在|当前|到日期|今天|)”;
私有字符串TwoDatesOperator=“\\s*(\\s*|-| ~ |-|-|-|-|-|,|到|直到)\\s*”;//1组
私有字符串TwoorFourDigitorPresentyPattern=presentPattern+“|”+twoOrFourDigitYearPattern;//2组
私有字符串secondIdenticalMonthPattern=“([1-9]| 0[1-9]| 1[012])(一月|二月|三月|四月|五月|六月|七月|八月|九月|十月|十一月|十二月|一月|二月|三月|四月|五月|六月|七月|八月|九月|十月1243)//
私有字符串DateToDateCompletePatterOne=
monthPattern+MontHandyearSeparator+Two或FourDigitYearPattern+TwoDatesOperator+
“((“+secondIdenticalMonthPattern+
蒙特汉德耶尔分离器+
twoOrFourDigitYearPattern+”)|“+
presentPattern+“””
;
私有模式patternAry=null;
私有匹配器匹配器=null;
公共模式(){
map=新的HashMap();
patternAry=Pattern.compile(dateToDateCompleteTatterOne,Pattern.Pattern不区分大小写);
matcher=patternAry.matcher(“”);
}
//
//提取这两个日期以查找之后的持续时间
//1.检查a年模式是否存在
//1.1如果没有,则跳到末尾的else并返回false
//2.如果是,则获取过去一年的剩余线路1
//3.检查第2年或当前/当前/。。。
公共布尔匹配模式(字符串输入行){
字符串fname=“matchTwoYearPattern”;
模式第一年模式=模式
.compile(twoOrFourDigitYearPattern,Pattern.CASE_不区分大小写);
Matcher matcher1=firstYearPattern.Matcher(“”);
Pattern secondPattern=Pattern.compile(两个或四个DigitorPresentyPattern,
模式(不区分大小写);
Matcher matcher2=secondPattern.Matcher(“”);
//long startTime=System.currentTimeMillis();
匹配器1.复位(输入线);
if(matcher1.find()){//1
String remainString=inputLine.substring(matcher1.end(),
inputLine.length());//2
matcher2.重置(剩余字符串);
if(matcher2.find()){//3
返回true;
}
}
返回false;//1.1并结束
}
公共字符串matchAllDatePatterns(字符串行,int-lineNum){
String fname=“matchAllPatterns::”;
if(matchTwoYearPattern(line)==false){//检查是否存在两年(或年和当前/今天…),如果不存在,则返回false
返回(“false:+行);
}
否则{
}
字符串匹配=”;
int i=0;
匹配器重置(行);
如果(matcher.find()){//这里我们将匹配模式DateToDateCompleteTatterOne
System.out.println(fname+“行:”+行);
System.out.println(“组计数”+matcher.groupCount());
System.out.println(“group1”+matcher.group(1));
System.out.println(“group2”+matcher.group(2));
System.out.println(“group3”+matcher.group(3));
System.out.println(“group4”+matcher.group(4));//2013-2013年1月的输入
//这里matcher.group(4)与模式中没有的0相匹配
System.out.println(“group5”+matcher.group(5));
System.out.println(“组6”+匹配器组(6));
System.out.println(“组7”+匹配器组(7));
System.out.println(“group8”+matcher.group(8));
System.out.println(“group9”+matcher.group(9));
System.out.println(“group10”+matcher.group(10));
System.out.println(“group11”+matcher.group(11));
System.out.println(“group12”+matcher.group(12));
System.out.println(“group13”+matcher.group(13));
System.out.println(“group14”+matcher.group(14));
}
返回匹配;
}
公共静态void main(字符串参数[]){
DatePattens dp=新的日期模式();
字符串fileName=“Resume.txt”;
试一试{
读取文件f
private String monthAndYearSeperator="\\s*(\\s*|,|;|~|--|-|.|\\/)\\s*";
private String monthAndYearSeperator="\\s*(\\s*|,|;|~|--|-|\\.|\\/)\\s*";
private String monthAndYearSeperator="\\s*(\\s*|,|;|~|--|-|.|\\/)\\s*";
private String twoDatesSeperator = "\\s*(\\s*|-|~|--|,|to|til|till|until)\\s*";