Java 正则表达式模式元数据

Java 正则表达式模式元数据,java,regex,Java,Regex,我想检查一个正则表达式,以确定它包含的匹配组。下面是我想要的API类型的一个示例: String pattern = "^My name is \"([^\"]*)\" and I am (\d*) years old$" Pattern p = Pattern.compile(pattern) Group g1 = p.getGroups(0); // Representing the name group g1.getStartPosition(); // should yeild pos

我想检查一个正则表达式,以确定它包含的匹配组。下面是我想要的API类型的一个示例:

String pattern = "^My name is \"([^\"]*)\" and I am (\d*) years old$"
Pattern p = Pattern.compile(pattern)

Group g1 = p.getGroups(0); // Representing the name group
g1.getStartPosition(); // should yeild position in regex string, e.g. 14
g1.getEndPosition();   // 21

Group g2 = p.getGroups(1); // Representing the age group
g2.getStartPosition(); // 34
g2.getEndPosition();   // 39
java标准
java.util.regex.Pattern
没有提供这一功能,但我想知道是否有任何现有的开源库允许我以这种方式检查正则表达式


我宁愿避免使用自己的API,尝试使用
java.lang.string
API来分离正则表达式字符串,因为这将特别麻烦。

这不是专业的API,但我鼓励您尝试使用这个类。我把它作为一个练习,它有两种类似于
Matcher
的方法,比如:
group(int-group)
start(int-group)
end(int-group)
groupCount()
。它很容易使用

import java.util.ArrayList;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Metamatcher {
    private String pattern;
    private TreeMap<Integer,Integer> groupsIndices;

    Metamatcher(String pattern){
        this.pattern = pattern;
        groupsIndices = getGroups();
    }
    /**
     * @param group ordinal number of group
     * @return starting index of a fragment of pattern, which contain group capturing
     */
    public int start(int group){
        ArrayList<Integer> indices = new ArrayList<Integer>(groupsIndices.keySet());
        indices.add(0,0);
        return indices.get(group);
    }

    /**
     * @param group ordinal number of group
     * @return ending index of a fragment of pattern, which contain group capturing
     */
    public int end(int group){
        ArrayList<Integer> indices = new ArrayList<Integer>(groupsIndices.values());
        indices.add(0,pattern.length());
        return indices.get(group);
    }

    /**
     * @param group ordinal number of group
     * @return String object containing fragment of regular expression which capture given group
     */
    public String group(int group){
        return pattern.substring(start(group), end(group));
    }

    /**
     * @return number of capturing groups within given regular expression
     */
    public int groupCount(){
        return groupsIndices.size();
    }

    public String toString(){
        StringBuilder result = new StringBuilder();
        result.append("Groups count: ")
                .append(groupCount())
                .append("\n");
        for(int i = 0; i <= groupCount(); i++){
            result.append("group(")
                    .append(i).append(") ")
                    .append(start(i))
                    .append("-")
                    .append(end(i))
                    .append("\t")
                    .append(group(i))
                    .append("\n");
        }
        return result.toString();
    }

    /**It extracts fragments of regular expression enclosed by parentheses, checks if these are capturing type,
     * and put start and end indices into Map object
     * @return Map contains fragments of regular expression which capture groups
     */
    private TreeMap<Integer,Integer> getGroups(){
        String copy = pattern;
        Pattern pattern = Pattern.compile("\\([^\\(\\)]+\\)");
        Matcher matcher = pattern.matcher(copy);
        TreeMap<Integer,Integer> temp = new TreeMap<Integer,Integer>();

        while(matcher.find()){
            if(isCapturingGroup(matcher.group(0))){
                temp.put(matcher.start(), matcher.end());
            }
            copy = copy.substring(0,matcher.start()) + replaceWithSpaces(matcher.group(0)) + copy.substring(matcher.end());
            matcher = pattern.matcher(copy);
        }

        return temp;
    }

    /**
     * @param fragment of regular expression, enclosed by brackets
     * @return true if given String consist regular expression which capture groups
     */
    private boolean isCapturingGroup(String fragment){
        return fragment.matches("((?<!\\\\)\\((?!\\?<?[:=!])[^\\(\\)]+\\))");
    }

    /**
     * Provide a filler String composed of spaces, to replace part enclosed by brackets
     * @param part String containing starting and ending with brackets,
     * @return String composed of spaces (' '), with length of part object,
     */
    private String replaceWithSpaces(String part){
        String filler = "";
        for(int i = 0; i < part.length(); i++){
            filler += " ";
        }
        return filler;
    }
}
import java.util.ArrayList;
导入java.util.TreeMap;
导入java.util.regex.Matcher;
导入java.util.regex.Pattern;
公共类元匹配器{
私有字符串模式;
私有树映射组;
元匹配器(字符串模式){
这个模式=模式;
groupsIndices=getGroups();
}
/**
*@param组组的序号
*@return模式片段的起始索引,包含组捕获
*/
公共整数启动(整数组){
ArrayList索引=新的ArrayList(groupsIndices.keySet());
指数。加上(0,0);
收益率指数.get(组);
}
/**
*@param组组的序号
*@return模式片段的结束索引,包含组捕获
*/
公共int端(int组){
ArrayList索引=新的ArrayList(groupsIndices.values());
index.add(0,pattern.length());
收益率指数.get(组);
}
/**
*@param组组的序号
*@return String对象,包含捕获给定组的正则表达式片段
*/
公共字符串组(int组){
返回模式。子字符串(开始(组),结束(组));
}
/**
*@返回给定正则表达式中捕获组的数目
*/
public int groupCount(){
返回groupsIndices.size();
}
公共字符串toString(){
StringBuilder结果=新建StringBuilder();
result.append(“组计数:”)
.append(groupCount())
.append(“\n”);
对于(int i=0;i

我用variuos input测试了它,并将输出与regex101等工具进行了比较,它对我很有用。

这不是专业的API,但我鼓励你尝试一下这个类。我把它作为一个练习,它有两个类似于
匹配器的方法,比如:
组(int组)
开始(int组)
结束(int组)
groupCount()
。使用起来相当简单

import java.util.ArrayList;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Metamatcher {
    private String pattern;
    private TreeMap<Integer,Integer> groupsIndices;

    Metamatcher(String pattern){
        this.pattern = pattern;
        groupsIndices = getGroups();
    }
    /**
     * @param group ordinal number of group
     * @return starting index of a fragment of pattern, which contain group capturing
     */
    public int start(int group){
        ArrayList<Integer> indices = new ArrayList<Integer>(groupsIndices.keySet());
        indices.add(0,0);
        return indices.get(group);
    }

    /**
     * @param group ordinal number of group
     * @return ending index of a fragment of pattern, which contain group capturing
     */
    public int end(int group){
        ArrayList<Integer> indices = new ArrayList<Integer>(groupsIndices.values());
        indices.add(0,pattern.length());
        return indices.get(group);
    }

    /**
     * @param group ordinal number of group
     * @return String object containing fragment of regular expression which capture given group
     */
    public String group(int group){
        return pattern.substring(start(group), end(group));
    }

    /**
     * @return number of capturing groups within given regular expression
     */
    public int groupCount(){
        return groupsIndices.size();
    }

    public String toString(){
        StringBuilder result = new StringBuilder();
        result.append("Groups count: ")
                .append(groupCount())
                .append("\n");
        for(int i = 0; i <= groupCount(); i++){
            result.append("group(")
                    .append(i).append(") ")
                    .append(start(i))
                    .append("-")
                    .append(end(i))
                    .append("\t")
                    .append(group(i))
                    .append("\n");
        }
        return result.toString();
    }

    /**It extracts fragments of regular expression enclosed by parentheses, checks if these are capturing type,
     * and put start and end indices into Map object
     * @return Map contains fragments of regular expression which capture groups
     */
    private TreeMap<Integer,Integer> getGroups(){
        String copy = pattern;
        Pattern pattern = Pattern.compile("\\([^\\(\\)]+\\)");
        Matcher matcher = pattern.matcher(copy);
        TreeMap<Integer,Integer> temp = new TreeMap<Integer,Integer>();

        while(matcher.find()){
            if(isCapturingGroup(matcher.group(0))){
                temp.put(matcher.start(), matcher.end());
            }
            copy = copy.substring(0,matcher.start()) + replaceWithSpaces(matcher.group(0)) + copy.substring(matcher.end());
            matcher = pattern.matcher(copy);
        }

        return temp;
    }

    /**
     * @param fragment of regular expression, enclosed by brackets
     * @return true if given String consist regular expression which capture groups
     */
    private boolean isCapturingGroup(String fragment){
        return fragment.matches("((?<!\\\\)\\((?!\\?<?[:=!])[^\\(\\)]+\\))");
    }

    /**
     * Provide a filler String composed of spaces, to replace part enclosed by brackets
     * @param part String containing starting and ending with brackets,
     * @return String composed of spaces (' '), with length of part object,
     */
    private String replaceWithSpaces(String part){
        String filler = "";
        for(int i = 0; i < part.length(); i++){
            filler += " ";
        }
        return filler;
    }
}
import java.util.ArrayList;
导入java.util.TreeMap;
导入java.util.regex.Matcher;
导入java.util.regex.Pattern;
公共类元匹配器{
私有字符串模式;
私有树映射组;
元匹配器(字符串模式){
这个模式=模式;
groupsIndices=getGroups();
}
/**
*@param组组的序号
*@return模式片段的起始索引,包含组捕获
*/
公共整数启动(整数组){
ArrayList索引=新的ArrayList(groupsIndices.keySet());
指数。加上(0,0);
收益率指数.get(组);
}
/**
*@param组组的序号
*@return模式片段的结束索引,包含组捕获
*/
公共int端(int组){
ArrayList索引=新的ArrayList(groupsIndices.values());
index.add(0,pattern.length());
收益率指数.get(组);
}
/**
*@param组组的序号
*@return String对象,包含捕获给定组的正则表达式片段
*/
公共字符串组(int组){
返回模式。子字符串(开始(组),结束(组));
}
/**
*@返回给定正则表达式中捕获组的数目
*/
public int groupCount(){
返回groupsIndices.size();
}
公共字符串toString(){
StringBuilder结果=新建StringBuilder();
result.append(“组计数:”)
.append(groupCount())
.append(“\n”);
对于(int i=0;i

我用variuos输入测试了它,并将输出与regex101等工具进行了比较,它对我很有效。

首先,您的字符串无效,我包含两个以上的“为什么要这样做?您是否正在尝试编写regex测试仪?(可通过
模式访问。matcher(“输入字符串”)
)提供了您需要的api(
groupCount()
开始(组)
结束(组)
),但它适用于
“输入字符串”
,而不是正则表达式。但是,找到一个正则表达式来匹配组并不是那么容易…(递归,不包括
(?:
等…)首先,您的字符串无效,我包含两个以上的字符串”为什么要这样做?您是否正在尝试编写正则表达式测试仪?(可通过
模式访问。matcher(“输入字符串”)
)提供您需要的api(
groupCount()
开始(组)
结束(组)
),但它适用于
输入字符串
,而不是正则表达式。但是,找到一个正则表达式来匹配组并不是那么容易的…(递归,不包括
(?:
等…)