Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/regex/19.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
C# 在CSharp中查找与正则表达式匹配的文本的更好方法?_C#_Regex - Fatal编程技术网

C# 在CSharp中查找与正则表达式匹配的文本的更好方法?

C# 在CSharp中查找与正则表达式匹配的文本的更好方法?,c#,regex,C#,Regex,我希望显示与文本字符串匹配的正则表达式列表 我以日期为例。空格代表其他文本 FindMatchRegex遍历正则表达式列表。 因为我不知道文本在正则表达式中的匹配位置,所以我匹配正则表达式的每个子字符串。 因此,从整个字符串开始,我通过从前面切掉一个字符来逐渐减少正则表达式 我检查它是否是有效的正则表达式,然后使用PCRE正则表达式检查部分匹配或完全匹配。 如果是部分匹配或完全匹配,请将其添加到可能匹配的正则表达式列表中 在.NETFiddle上,对于200个长度的长字符串和200个正则表达式,

我希望显示与文本字符串匹配的正则表达式列表

我以日期为例。空格代表其他文本

FindMatchRegex遍历正则表达式列表。 因为我不知道文本在正则表达式中的匹配位置,所以我匹配正则表达式的每个子字符串。 因此,从整个字符串开始,我通过从前面切掉一个字符来逐渐减少正则表达式 我检查它是否是有效的正则表达式,然后使用PCRE正则表达式检查部分匹配或完全匹配。 如果是部分匹配或完全匹配,请将其添加到可能匹配的正则表达式列表中

在.NETFiddle上,对于200个长度的长字符串和200个正则表达式,这将在大约1秒内执行。 在我的桌面上,16GB,i5-3570K 3.4GHz,大约需要6秒钟

我正在寻找大约0.5秒的响应时间。如何使速度提高10倍或100倍

我缺少什么命令或技巧

using System;
using System.Collections.Generic;
using System.Text.RegularExpressions;
using PCRE;
using System.Diagnostics;

public class Program
{
    public static void Main()
    {
        List<string> regexList = new();
        string longString = new string (' ', 200);
        regexList.Add(longString + @"(\d|\d\d) December (\d\d\d\d) 1066");
        regexList.Add(longString + @"(\d|\d\d) December (\d\d\d\d) 1999");
        regexList.Add(longString + @"(\d|\d\d) December (\d\d\d\d) 2000");
        regexList.Add(longString + @"(\d|\d\d) December (\d\d\d\d) 2020");
        for (int i = 0; i < 200; i++)
            regexList.Add(longString + @"(\d|\d\d) April (\d\d\d\d)");
                string checkString = "1 December 1234 10"; 
        //      string checkString = "1 December 4567 1";
        //      string checkString = "1 December 1234 20"; 
        string checkString = "1 December 1234";
        Stopwatch stopwatch = new();
        stopwatch.Start();
        List<string> result = FindMatchRegex(checkString, regexList);
        stopwatch.Stop();
        foreach (var item in result)
        {
            Console.WriteLine(checkString + " found to match " + item);
        }

        Console.WriteLine("Time elapsed: " + stopwatch.Elapsed);
    }

    private static List<string> FindMatchRegex(string filter, List<string> regexList)
    {
        List<string> matchingRegexes = new();
        for (int i = 0; i < regexList.Count; i++)
        {
            string currentRegex = regexList[i];
            bool anyMatches = false;
            int j = 0;
            while (j < currentRegex.Length && anyMatches == false)
            {
                string currentRegexSubstring = currentRegex.Substring(j);
                if (IsValidRegex(currentRegexSubstring))
                {
                    var regex = new PcreRegex("^" + currentRegexSubstring);
                    var match = regex.Match(filter, PcreMatchOptions.PartialSoft);
                    anyMatches = anyMatches || match.IsPartialMatch || match.Success;
                }

                j++;
            }

            if (anyMatches == true)
            {
                matchingRegexes.Add(currentRegex);
            }
        }

        return matchingRegexes;
    }

    private static bool IsValidRegex(string pattern)
    {
        if (string.IsNullOrWhiteSpace(pattern))
            return false;
        try
        {
            Regex.Match("", pattern);
        }
        catch (ArgumentException)
        {
            return false;
        }

        return true;
    }
}
使用系统;
使用System.Collections.Generic;
使用System.Text.RegularExpressions;
使用PCRE;
使用系统诊断;
公共课程
{
公共静态void Main()
{
List regexList=new();
字符串长字符串=新字符串(“”,200);
regexList.Add(longString+@“(\d\d\d)十二月(\d\d\d)1066”);
regexList.Add(longString+@)(\d\d\d)1999年12月(\d\d\d)日);
regexList.Add(longString+@)(\d\d\d)2000年12月(\d\d\d)日);
regexList.Add(longString+@)(\d\d\d)2020年12月(\d\d\d)日);
对于(int i=0;i<200;i++)
regexList.Add(longString+@“(\d\d\d)April(\d\d\d)”);
string checkString=“1234年12月1日10”;
//string checkString=“4567年12月1日1”;
//string checkString=“1234年12月1日20”;
string checkString=“1234年12月1日”;
秒表秒表=新的();
秒表。开始();
列表结果=FindMatchRegex(检查字符串,regexList);
秒表;
foreach(结果中的var项目)
{
Console.WriteLine(检查字符串+“发现匹配”+项);
}
Console.WriteLine(“经过的时间:+秒表.经过的时间”);
}
私有静态列表FindMatchRegex(字符串筛选器,列表regexList)
{
List matchingRegexes=new();
for(int i=0;i
编辑

节目目的

我正在写一个使用内部翻译的翻译程序。独特的句子匹配正确且容易,但为日期或产品项目描述的微小变化添加新的翻译会让人厌烦。因此,字典包含正则表达式,以匹配要翻译成语言的英语。非常适合在翻译过程中不发生变化的日期和产品项目

当用户想要更新翻译,而不是整个英语的类型时,他们可以只键入英语的一部分来隔离要更新的翻译。因此,我想从字典中筛选术语列表,为用户提供匹配术语的下拉列表

例如,如果我键入“2020年12月31日”,我需要一个与2020年12月31日匹配的所有英语术语的列表,但如果词典使用正则表达式“…(\d\d\d)December(\d\d\d)…”,则它在文本基础上不会匹配。我想扫描字典,以便所有带有regex“(\d\d\d\d)December(\d\d\d\d)”的英语术语也将匹配

我是不是用错误的方法来解决这个问题

编辑

要翻译的字符串示例

由于第18c节的缺陷通知,ABC部分已于2010年7月21日被XYZ部分替换

由于缺陷通知第17b节,零件DEF已于2009年7月15日替换为零件RST

由于第15a节的缺陷通知,零件DEF已于2008年7月15日被零件RST替换

正则表达式来翻译字符串,我现在有大约200个,预计会增加

零件([A-Z][A-Z][A-Z])已于7月(\d\d\d\d\d)日被零件([A-Z][A-Z][A-Z])替换,原因是缺陷通知部分(\d\d[A-Z])

翻译

由于缺陷注释第5节,语言部分$1语言已在$3语言7月$4语言被第2部分替换为$3语言

匹配字符串并翻译它们很好。如果在校对过程中,我们收到通知,“由于缺陷通知第18c节,ABC部分已于2010年7月21日被XYZ部分替换”是错误的,则用户可以键入“由于缺陷通知第18c节,ABC部分已被XYZ部分替换”,程序可以显示“由于缺陷通知第18c节,ABC部分已于7月(\d\d)日(\d\d\d\d)被[A-Z][A-Z][A-Z])A-Z]部分替换(\d\d[a-z])作为可能匹配的英语术语,然后去编辑翻译

有些时候,我们看到的文本有轻微的错误,或拼写错误或额外的标点符号
static void Main(string[] args)
{
    Stopwatch stopwatch = new Stopwatch();
    stopwatch.Start();

    // build your list of regex string, ideally reading them in from a file or getting them from a db
    List<string> regexList = new List<string>();
    regexList.Add(@"Part ([A-Z]{3}) has been replaced by part ([A-Z]{3}) on (\d{1,2}) July (\d{4}) 1066 because of defect notice section (\d{1,2}[a-z])");
    regexList.Add(@"Part ([A-Z]{3}) has been replaced by part ([A-Z]{3}) on (\d{1,2}) July (\d{4}) 1999 because of defect notice section (\d{1,2}[a-z])");
    regexList.Add(@"Part ([A-Z]{3}) has been replaced by part ([A-Z]{3}) on (\d{1,2}) July (\d{4}) 2000 because of defect notice section (\d{1,2}[a-z])");
    regexList.Add(@"Part ([A-Z]{3}) has been replaced by part ([A-Z]{3}) on (\d{1,2}) July (\d{4}) 2020 because of defect notice section (\d{1,2}[a-z])");
    for (int i = 0; i < 10; i++)
    {
        regexList.Add(@"Part ([A-Z]{3}) has been replaced by part ([A-Z]{3}) on (\d{1,2}) April (\d{4}) 2020 because of defect notice section (\d{1,2}[a-z])");
    }

    // if you aren't going to maintain a clean list, clean it now before we start testing
    List<string> cleanRegexList = CleanRegexList(regexList);

    string checkString = "1 July 2000 1"; // Expect 2 results
    //string checkString = "1 July 2000 19"; // Expect 1 result
    //string checkString = "1 July 2000 20"; // Expect 2 results
    //string checkString = "1 July 2000 202"; // Expect 1 result
    //string checkString = "1 July 2000"; // Expect 4 results

    List<string> results = FindMatchRegex(checkString, cleanRegexList);
    stopwatch.Stop();

    foreach (string result in results)
    {
        Console.WriteLine(checkString + " found to match " + result);
    }

    Console.WriteLine("Time elapsed: " + stopwatch.Elapsed);
}
Process.ExtractTop(
    "Part ABC has been replaced by part XYZ on 21 July 2010 because of defect notice section 18c",
    regexList,
    limit: 10);
(\d|\d\d) December (\d\d\d\d) 2020
(\d|\d\d) December (\d\d\d\d) 1066
(\d{1,2}) December (\d{4}) 1066
1 December 2020 found to match (\d{1,2}) December (\d{4})
Time elapsed: 00:00:00.0083255
public static void Main()
{
    Stopwatch stopwatch = new Stopwatch();
    stopwatch.Start();

    // build your list of regex string, ideally reading them in from a file or getting them from a db
    List<string> regexList = new List<string>();
    regexList.Add(@"(\d{1,2}) December (\d{4})");
    for (int i = 0; i < 200; i++)
    {
        regexList.Add(@"(\d{1,2}) April (\d{4})");
    }

    // if you aren't going to maintain a clean list, clean it now before we start testing
    List<string> cleanRegexList = CleanRegexList(regexList);

    string checkString = "1 December 2020";
    // string checkString = "15 April 2020";

    List<string> results = FindMatchRegex(checkString, cleanRegexList);
    stopwatch.Stop();

    foreach (string result in results)
    {
        Console.WriteLine(checkString + " found to match " + result);
    }

    Console.WriteLine("Time elapsed: " + stopwatch.Elapsed);
}
private static List<string> FindMatchRegex(string checkString, List<string> regexList)
{
    List<string> matchingRegexes = new List<string>();
    foreach (string regexString in regexList)
    {
        Regex regex = new Regex(regexString);
        if (regex.IsMatch(checkString))
        {
            matchingRegexes.Add(regexString);
        }
    }

    return matchingRegexes;
}

private static List<string> CleanRegexList(List<string> regexList)
{
    List<string> cleanRegexes = new List<string>();
    foreach (string regexString in regexList)
    {
        if (IsValidRegex(regexString))
        {
            cleanRegexes.Add(regexString);
        }
    }

    return cleanRegexes;
}
static void Main(string[] args)
{
    Stopwatch stopwatch = new Stopwatch();
    stopwatch.Start();

    // build your list of regex string, ideally reading them in from a file or getting them from a db
    List<string> regexList = new List<string>();
    regexList.Add(@"Part ([A-Z]{3}) has been replaced by part ([A-Z]{3}) on (\d{1,2}) July (\d{4}) 1066 because of defect notice section (\d{1,2}[a-z])");
    regexList.Add(@"Part ([A-Z]{3}) has been replaced by part ([A-Z]{3}) on (\d{1,2}) July (\d{4}) 1999 because of defect notice section (\d{1,2}[a-z])");
    regexList.Add(@"Part ([A-Z]{3}) has been replaced by part ([A-Z]{3}) on (\d{1,2}) July (\d{4}) 2000 because of defect notice section (\d{1,2}[a-z])");
    regexList.Add(@"Part ([A-Z]{3}) has been replaced by part ([A-Z]{3}) on (\d{1,2}) July (\d{4}) 2020 because of defect notice section (\d{1,2}[a-z])");
    for (int i = 0; i < 10; i++)
    {
        // Needs to be unique for dictionary
        regexList.Add(@"Part ([A-Z]{3}) has been replaced by part ([A-Z]{3}) on (\d{1,2}) April (\d{4}) " + i.ToString() + @" because of defect notice section (\d{1,2}[a-z])");
    }

    // I'll store this somewhere
    Dictionary<string, List<string>> regexDictionary = FindAllRegexStrings(regexList);

    string checkString = "1 July 2000 1"; // Expect 2 results
    //string checkString = "1 July 2000 19"; // Expect 1 result
    //string checkString = "1 July 2000 20"; // Expect 2 results
    //string checkString = "1 July 2000 202"; // Expect 1 result
    //string checkString = "1 July 2000"; // Expect 4 results

    //List<string> results = FindMatchRegex(checkString, cleanRegexList);
    stopwatch.Stop();
    Console.WriteLine("Setting up time elapsed: " + stopwatch.Elapsed);

    stopwatch.Reset();
    stopwatch.Start();

    List<string> results = FindMatchRegex(checkString, regexDictionary);

    stopwatch.Stop();
    Console.WriteLine("Finding regex time elapsed: " + stopwatch.Elapsed);

    foreach (string result in results)
    {
        Console.WriteLine(checkString + " found to match " + result);
    }
}

private static List<string> FindMatchRegex(string checkString, Dictionary<string, List<string>> regexDictionary)
{
    List<string> matchingRegexes = new();

    foreach (var regexEntry in regexDictionary)
    {
        List<string> currentRegexes = regexEntry.Value;
        bool anyMatches = false;

        for (int j = 0; j < currentRegexes.Count && anyMatches == false; j++)
        {
            string currentRegexSubstring = currentRegexes[j];
            var regex = new PcreRegex("^" + currentRegexSubstring);
            var match = regex.Match(checkString, PcreMatchOptions.PartialSoft);
            anyMatches = anyMatches || match.IsPartialMatch || match.Success;
        }

        if (anyMatches == true)
        {
            matchingRegexes.Add(regexEntry.Key);
        }
    }
    return matchingRegexes;
}

private static Dictionary<string, List<string>> FindAllRegexStrings(List<string> regexList)
{
    Dictionary<string, List<string>> regexDictionary = new();

    for (int i = 0; i < regexList.Count; i++)
    {
        regexDictionary.Add(regexList[i], ValidRegexes(regexList[i]));
    }

    return regexDictionary;
}

private static List<string> ValidRegexes(string regexString)
{
    List<string> listRegexes = new();
    for (int i = 0; i < regexString.Length; i++)
    {
        string currentRegexSubstring = regexString.Substring(i);
        if (IsValidRegex(currentRegexSubstring))
        {
            listRegexes.Add(currentRegexSubstring);
        }
    }
    return listRegexes;
}

private static bool IsValidRegex(string pattern)
{
    if (string.IsNullOrWhiteSpace(pattern))
        return false;
    try
    {
        Regex.Match("", pattern);
    }
    catch (ArgumentException)
    {
        return false;
    }

    return true;
}