Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/regex/18.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Java正则表达式中发现的错误_Java_Regex - Fatal编程技术网

Java正则表达式中发现的错误

Java正则表达式中发现的错误,java,regex,Java,Regex,我们似乎发现了java正则表达式的一个bug 我们正在尝试匹配不同的月-年模式,这些模式在一行中出现两次,或者只是在一行中出现两年的模式 但java似乎将今年的部分时间与空间分隔符混淆了。我已经向我的教授展示了这个问题,但我们无法解决它 具体来说,我们希望匹配“2013年1月-2014年1月”以及“2013年-2014年”。发生的情况是,在2013年,我们得到0以匹配月份和年份之间的分隔符,即使0不在分隔符模式中。所以我们最终得到了与2/13相同的结果 这是代码 import java.

我们似乎发现了java正则表达式的一个bug

我们正在尝试匹配不同的月-年模式,这些模式在一行中出现两次,或者只是在一行中出现两年的模式

但java似乎将今年的部分时间与空间分隔符混淆了。我已经向我的教授展示了这个问题,但我们无法解决它

具体来说,我们希望匹配“2013年1月-2014年1月”以及“2013年-2014年”。发生的情况是,在2013年,我们得到0以匹配月份和年份之间的分隔符,即使0不在分隔符模式中。所以我们最终得到了与2/13相同的结果 这是代码

    import java.io.IOException;
    import java.util.ArrayList;
    import java.util.GregorianCalendar;
    import java.util.HashMap;
    import java.util.concurrent.CountDownLatch;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;

    import org.w3c.dom.css.Counter;

    public class DatePattens {
        //private ArrayList<MatchedDateObject> arryLstOfDates = new ArrayList<MatchedDateObject>();
        private ArrayList<String> matchedString = new ArrayList<String>();
        private HashMap<String,Integer> map ;


        private String monthPattern = "((0[1-9]|1[012]|[1-9])|(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sept|Sep|Oct|Nov|Dec)[a-z]*)";  // 3 groups
        private String monthAndYearSeperator="\\s*(\\s*|,|;|~|--|-|.|\\/)\\s*";      // 1 group
        private String twoOrFourDigitYearPattern="(19[0-9]{2}|[2-9][0-9]{3}|[0-9]{2})\\s*";         // 1 group  
        private String presentPattern = "(Current|Present|Now|Currently|Presently|Till Date|Todate|Today)";
        private String twoDatesSeperator = "\\s*(\\s*|-|~|--|,|to|til|till|until)\\s*";    // 1 group
        private String twoOrFourDigitOrPresentYearPattern = presentPattern + "|" + twoOrFourDigitYearPattern;  // 2 groups
        private String secondIdenticalMonthPattern="(([1-9]|0[1-9]|1[012])|(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sept|Sep|Oct|Nov|Dec|January|February|March|April|May|June|July|August|September|October|November|December))";  // 3 groups
        private String dateToDateCompletePatternOne=
        monthPattern + monthAndYearSeperator + twoOrFourDigitYearPattern + twoDatesSeperator +  
        "((" + secondIdenticalMonthPattern +
        monthAndYearSeperator +
        twoOrFourDigitYearPattern +")|" +
        presentPattern +")" 
    ;               
        private Pattern patternAry = null;
        private Matcher matcher = null;
        public DatePattens() {
            map = new HashMap<String,Integer>();
            patternAry = Pattern.compile(dateToDateCompletePatternOne, Pattern.CASE_INSENSITIVE);
            matcher = patternAry.matcher("");   
        }
        //
        // extract the two dates to look for duration afterwards
        // 1. check if the a year pattern exists
        //    1.1 if not skip to else at the end and return false
        // 2. if yes get the rest of the line past year 1
        // 3. check for year 2 or CURRENT/Present/...

        public boolean matchTwoYearPattern(String inputLine){
            String fname="matchTwoYearPattern";
            Pattern firstYearPattern = Pattern
                    .compile(twoOrFourDigitYearPattern,Pattern.CASE_INSENSITIVE);
            Matcher matcher1 = firstYearPattern.matcher("");


            Pattern secondPattern = Pattern.compile(twoOrFourDigitOrPresentYearPattern,
                    Pattern.CASE_INSENSITIVE);
            Matcher matcher2 = secondPattern.matcher("");
            //long startTime = System.currentTimeMillis();

            matcher1.reset(inputLine);
            if (matcher1.find()) {  // 1
                String remaingString = inputLine.substring(matcher1.end(),
                        inputLine.length());   // 2
                matcher2.reset(remaingString);
                if (matcher2.find()) {  // 3
                    return true;
                }

            }       
            return false;   // 1.1 and end 

        }
        public String matchAllDatePatterns(String line, int lineNum){
            String fname = "matchAllPatterns:: ";
             if (matchTwoYearPattern(line) == false) {  // check if two years (or year and CURRENT/today...) present, if not return false
                 return("false:" + line);
             }
             else {
             }
            String matched = "";
            int i = 0;
                matcher.reset(line);
                if (matcher.find()) {// here we are matching the pattern dateToDateCompletePatternOne
                 System.out.println(fname + "line: " +line);
                    System.out.println("group count "+matcher.groupCount());                
                    System.out.println("group1 " +matcher.group(1));
                    System.out.println("group2 " +matcher.group(2));
                    System.out.println("group3 " +matcher.group(3));
                    System.out.println("group4 " +matcher.group(4));//so for 2013 - Jan 2013 input
                    //here matcher.group(4) is matching to 0 which we dont have in the pattern
                    System.out.println("group5 " +matcher.group(5));
                    System.out.println("group6 " +matcher.group(6));
                    System.out.println("group7 " +matcher.group(7));
                    System.out.println("group8 " +matcher.group(8));
                    System.out.println("group9 " +matcher.group(9));
                    System.out.println("group10 " +matcher.group(10));
                    System.out.println("group11 " +matcher.group(11));
                    System.out.println("group12 " +matcher.group(12));
                    System.out.println("group13 " +matcher.group(13));
                    System.out.println("group14 " + matcher.group(14));        
            }

                return matched;

        }
        public static void main(String args[]){
            DatePattens dp= new DatePattens();
            String fileName = "Resume.txt";

            try {
                ReadFile file = new ReadFile(fileName);
                String[] aryLines = file.openFile();
                int i=0;
                 long startTime =System.currentTimeMillis();


                    for (String input : aryLines) {
                        String output = dp.matchAllDatePatterns(input, i);
                        i++;
                    }

                long endTime =System.currentTimeMillis();
                System.out.println("Time required for this operation :" + ((endTime-startTime)*0.001));

            } catch (IOException e) {
                System.out.println(e);
            }

        }

    }
import java.io.IOException;
导入java.util.ArrayList;
导入java.util.GregorianCalendar;
导入java.util.HashMap;
导入java.util.concurrent.CountDownLatch;
导入java.util.regex.Matcher;
导入java.util.regex.Pattern;
导入org.w3c.dom.css.Counter;
公共类模式{
//private ArrayList arryLstOfDates=new ArrayList();
private ArrayList matchedString=new ArrayList();
私有哈希映射;
私有字符串monthPattern=“(0[1-9]| 1[012]|[1-9])|(一月|二月|三月|四月|五月|六月|七月|八月|九月|九月|十月|十一月|十二月)[a-z]*”//3组
私有字符串MontHandyearSeparator=“\\s*(\\s*|,| | ~-|-|-|-|/)\\s*”;//1组
私有字符串twoOrFourDigitYearPattern=“(19[0-9]{2}|[2-9][0-9]{3}|[0-9]{2})\\s*”;//1组
私有字符串presentPattern=“(当前|当前|现在|当前|到日期|今天|)”;
私有字符串TwoDatesOperator=“\\s*(\\s*|-| ~ |-|-|-|-|-|,|到|直到)\\s*”;//1组
私有字符串TwoorFourDigitorPresentyPattern=presentPattern+“|”+twoOrFourDigitYearPattern;//2组
私有字符串secondIdenticalMonthPattern=“([1-9]| 0[1-9]| 1[012])(一月|二月|三月|四月|五月|六月|七月|八月|九月|十月|十一月|十二月|一月|二月|三月|四月|五月|六月|七月|八月|九月|十月1243)//
私有字符串DateToDateCompletePatterOne=
monthPattern+MontHandyearSeparator+Two或FourDigitYearPattern+TwoDatesOperator+
“((“+secondIdenticalMonthPattern+
蒙特汉德耶尔分离器+
twoOrFourDigitYearPattern+”)|“+
presentPattern+“””
;               
私有模式patternAry=null;
私有匹配器匹配器=null;
公共模式(){
map=新的HashMap();
patternAry=Pattern.compile(dateToDateCompleteTatterOne,Pattern.Pattern不区分大小写);
matcher=patternAry.matcher(“”);
}
//
//提取这两个日期以查找之后的持续时间
//1.检查a年模式是否存在
//1.1如果没有,则跳到末尾的else并返回false
//2.如果是,则获取过去一年的剩余线路1
//3.检查第2年或当前/当前/。。。
公共布尔匹配模式(字符串输入行){
字符串fname=“matchTwoYearPattern”;
模式第一年模式=模式
.compile(twoOrFourDigitYearPattern,Pattern.CASE_不区分大小写);
Matcher matcher1=firstYearPattern.Matcher(“”);
Pattern secondPattern=Pattern.compile(两个或四个DigitorPresentyPattern,
模式(不区分大小写);
Matcher matcher2=secondPattern.Matcher(“”);
//long startTime=System.currentTimeMillis();
匹配器1.复位(输入线);
if(matcher1.find()){//1
String remainString=inputLine.substring(matcher1.end(),
inputLine.length());//2
matcher2.重置(剩余字符串);
if(matcher2.find()){//3
返回true;
}
}       
返回false;//1.1并结束
}
公共字符串matchAllDatePatterns(字符串行,int-lineNum){
String fname=“matchAllPatterns::”;
if(matchTwoYearPattern(line)==false){//检查是否存在两年(或年和当前/今天…),如果不存在,则返回false
返回(“false:+行);
}
否则{
}
字符串匹配=”;
int i=0;
匹配器重置(行);
如果(matcher.find()){//这里我们将匹配模式DateToDateCompleteTatterOne
System.out.println(fname+“行:”+行);
System.out.println(“组计数”+matcher.groupCount());
System.out.println(“group1”+matcher.group(1));
System.out.println(“group2”+matcher.group(2));
System.out.println(“group3”+matcher.group(3));
System.out.println(“group4”+matcher.group(4));//2013-2013年1月的输入
//这里matcher.group(4)与模式中没有的0相匹配
System.out.println(“group5”+matcher.group(5));
System.out.println(“组6”+匹配器组(6));
System.out.println(“组7”+匹配器组(7));
System.out.println(“group8”+matcher.group(8));
System.out.println(“group9”+matcher.group(9));
System.out.println(“group10”+matcher.group(10));
System.out.println(“group11”+matcher.group(11));
System.out.println(“group12”+matcher.group(12));
System.out.println(“group13”+matcher.group(13));
System.out.println(“group14”+matcher.group(14));
}
返回匹配;
}
公共静态void main(字符串参数[]){
DatePattens dp=新的日期模式();
字符串fileName=“Resume.txt”;
试一试{
读取文件f
private String monthAndYearSeperator="\\s*(\\s*|,|;|~|--|-|.|\\/)\\s*";
private String monthAndYearSeperator="\\s*(\\s*|,|;|~|--|-|\\.|\\/)\\s*";
private String monthAndYearSeperator="\\s*(\\s*|,|;|~|--|-|.|\\/)\\s*";
private String twoDatesSeperator = "\\s*(\\s*|-|~|--|,|to|til|till|until)\\s*";