Warning: file_get_contents(/data/phpspider/zhask/data//catemap/2/csharp/321.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181

Warning: file_get_contents(/data/phpspider/zhask/data//catemap/2/.net/24.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
C# 捕获文本的正则表达式_C#_.net_Regex_Linq - Fatal编程技术网

C# 捕获文本的正则表达式

C# 捕获文本的正则表达式,c#,.net,regex,linq,C#,.net,Regex,Linq,我有一个包含以下内容的日志文件: 2012-07-16 03:20:4123796160897,Text,id:SAR-23796160897-c0-2-1 sub:000 dlvrd:001提交日期:120715220216完成日期:120716032038 stat:deliverd err:000 Text:,FOTSO TOKAM,SMSCReceiptMsgId=SAR-23796160897-c0-2-1 2012-07-16 03:20:4823796160897,Text,id:

我有一个包含以下内容的日志文件:

2012-07-16 03:20:4123796160897,Text,id:SAR-23796160897-c0-2-1 sub:000 dlvrd:001提交日期:120715220216完成日期:120716032038 stat:deliverd err:000 Text:,FOTSO TOKAM,SMSCReceiptMsgId=SAR-23796160897-c0-2-1 2012-07-16 03:20:4823796160897,Text,id:SAR-23796160897-c0-2-2 sub:000 dlvrd:001提交日期:120715220216完成日期:120716032045 stat:deliverd err:000 Text:,FOTSO TOKAM,SMSCReceiptMsgId=SAR-23796160897-c0-2-2 2012-05-04 00:07:4623777603300,文本,id:4FA23EB0 sub:000 dlvrd:001提交日期:120503225018完成日期:120504000744 stat:deliverd err:000文本:,FLP,SMSCReceiptMsgId=4FA23EB0 2012-05-04 01:50:1823796726987,文本,id:4FA23E95 sub:000 dlvrd:001提交日期:120503225014完成日期:120504015016状态:交付错误:000文本:,FLP,SMSCReceiptMsgId=4FA23E95 2012-05-04 01:50:222379757015,文本,id:4FA23EB2 sub:000 dlvrd:001提交日期:120503225018完成日期:120504015021状态:交付错误:000文本:,FLP,SMSCReceiptMsgId=4FA23EB2 2012-05-04 01:50:4823799907239,文本,id:4FA23F38 sub:000 dlvrd:001提交日期:120503225042完成日期:120504015046状态:交付错误:000文本:,FLP,SMSCReceiptMsgId=4FA23F38 2012-05-04 01:50:4823799896455,文本,id:4FA23D1C sub:000 dlvrd:001提交日期:120503175232完成日期:120504015046状态:交付错误:000文本:,FLP,SMSCReceiptMsgId=4FA23D1C 2012-05-04 01:50:4823799896455,文本,id:4FA23F04 sub:000 dlvrd:001提交日期:120503225031完成日期:120504015046统计:交付错误:000文本:,FLP,SMSCReceiptMsgId=4FA23F04 2012-05-04 01:50:5023794105044,文本,id:4FA23F55 sub:000 dlvrd:001提交日期:120503225046完成日期:120504015048状态:交付错误:000文本:,FLP,SMSCReceiptMsgId=4FA23F55 2012-05-04 01:51:1923796029764,Text,id:4FA23FEE sub:000 dlvrd:001提交日期:120503225114完成日期:120504015117 stat:deliverd err:000 Text:,FLP,SMSCReceiptMsgId=4FA23FEE 2012-05-04 02:17:5123775461594,文本,id:4FA24025 sub:000 dlvrd:001提交日期:120503225125完成日期:120504021749状态:交付错误:000文本:,FLP,SMSCReceiptMsgId=4FA24025 2012-05-04 04:08:0223777437781,文本,id:4FA23F23 sub:000 dlvrd:001提交日期:120503225037完成日期:120504040800状态:交付错误:000文本:,FLP,SMSCReceiptMsgId=4FA23F23 2012-05-04 04:50:1223777970013,文本,id:4FA23E70 sub:000 dlvrd:000提交日期:12050322505完成日期:120504045011状态:过期错误:027文本:,FLP,SMSCReceiptMsgId=4FA23E70 2012-05-04 04:50:1523775182832,文本,id:4FA23E7E sub:000 dlvrd:000提交日期:120503225008完成日期:120504045014统计:过期错误:027文本:,FLP,SMSCReceiptMsgId=4FA23E7E 2012-05-04 04:50:172377789644,文本,id:4FA23E80 sub:000 dlvrd:000提交日期:120503225010完成日期:120504045016状态:过期错误:027文本:,FLP,SMSCReceiptMsgId=4FA23E80 2012-05-04 04:50:2123777529371,文本,id:4FA23E8F sub:000 dlvrd:000提交日期:120503225013完成日期:120504045019统计:过期错误:027文本:,FLP,SMSCReceiptMsgId=4FA23E8F 2012-05-04 04:50:2123777613852,文本,id:4FA23E97 sub:000 dlvrd:000提交日期:120503225014完成日期:120504045020状态:过期错误:027文本:,FLP,SMSCReceiptMsgId=4FA23E97 2012-05-04 04:50:2423777407598,文本,id:4FA23EAE sub:000 dlvrd:000提交日期:120503225017完成日期:120504045023状态:过期错误:032文本:,FLP,SMSCReceiptMsgId=4FA23EAE 2012-05-04 04:50:2623777736950,文本,id:4FA23EAF sub:000 dlvrd:000提交日期:120503225018完成日期:120504045024状态:过期错误:027文本:,FLP,SMSCReceiptMsgId=4FA23EAF 2012-05-04 04:50:3123775834128,文本,id:4FA23ED6 sub:000 dlvrd:000提交日期:120503225024完成日期:120504045030状态:过期错误:027文本:,FLP,SMSCReceiptMsgId=4FA23ED6 2012-05-04 04:50:3623777486441,文本,id:4FA23EF3 sub:000 dlvrd:000提交日期:120503225029完成日期:120504045035统计:过期错误:027文本:,FLP,SMSCReceiptMsgId=4FA23EF3 现在,我想通过使用正则表达式和c#.net和LINQ,从一些特定字段(如“id,done date,stat”)的内容中获取值


如果有人有任何想法,请帮助我。

可能csv解析器会更好,但您可以使用此正则表达式并用所需的其他字段替换id:。ex
完成日期:(?*?)\s

string strRegex = @"id:(?<id>.*?)\s.*?done date:(?<donedate>.*?)\s.*?stat:(?<stat>.*?)\s";
RegexOptions myRegexOptions = RegexOptions.IgnoreCase | RegexOptions.Multiline;
Regex myRegex = new Regex(strRegex, myRegexOptions);
string strTargetString = @"2012-07-16 03:20:41,23796160897,Text,id:SAR-23796160897-c0-2-1 sub:000 dlvrd:001 submit date:120715220216 done date:120716032038 stat:DELIVRD err:000 text:,FOTSO TOKAM,SMSCReceiptMsgId=SAR-23796160897-c0-2-1"
foreach (Match myMatch in myRegex.Matches(strTargetString))
{
  if (myMatch.Success)
  {
    // Add your code here 
    //myMatch.Groups["id"].Value;
    //myMatch.Groups["donedate"].Value;
    //myMatch.Groups["stat"].Value;
  }
}
string stregex=@“id:(?*?)\s.*完成日期:(?*?)\s.*状态:(?*?)\s”;
RegexOptions myRegexOptions=RegexOptions.IgnoreCase | RegexOptions.Multiline;
正则表达式myRegex=新正则表达式(strRegex,myRegexOptions);
字符串strTargetString=@“2012-07-16 03:20:4123796160897,文本,id:SAR-23796160897-c0-2-1 sub:000 dlvrd:001提交日期:120715220216完成日期:120716032038 stat:deliverd err:000 Text:,FOTSO TOKAM,SMSCReceiptMsgId=SAR-23796160897-c0-2-1”
foreach(在myRegex.Matches中匹配myMatch(strTargetString))
{
如果(myMatch.Success)
{
//在这里添加您的代码
//myMatch.Groups[“id”]值;
//myMatch.Groups[“donedate”]值;
//myMatch.Groups[“stat”]值;
}
}

您可以使用一个regex
id:(?*?)\s.*?完成日期:(?*?)\s.*?stat:(?*?)\s
然后使用组访问,如
myMatch.groups[“id”]。Value

我认为您的regex在这方面帮不了你多少忙。相反,您应该将行拆分为行,然后再拆分为列,因为我可以看到数据可以分割为矩阵,从中可以轻松提取您正在查找的信息。。。甚至可以用JavaScript/C#/Java或任何语言来实现这一点

在我的实践中,这样做:

  • 将数据拆分为行
  • 将行拆分为列
  • 然后遍历每一行并指向要查找的列

    var content = data.split('\n');
    foreach(var line in content) 
    {
         var cols = line.split(',');
         var c1 = cols[0];
         var c2 = cols[1];
         var c3 = cols[2];
    }
    

你可以根据自己的需要修改以上摘录。。。这是最好的办法

不清楚所有字段的含义,或者分隔符是否为常量。使用您提供的测试数据,可以将大部分信息放入命名的组中

/// <summary>
///  Regular expression built for C# on: Tue, Jul 17, 2012, 12:08:12 PM
///  Using Expresso Version: 3.0.4334, http://www.ultrapico.com
///  
///  A description of the regular expression:
///  
///  Beginning of line or string
///  [Date]: A named capture group. [[^,]+]
///      Any character that is NOT in this class: [,], one or more repetitions
///  ,
///  [Number]: A named capture group. [[^,]+]
///      Any character that is NOT in this class: [,], one or more repetitions
///  ,
///  [Text1]: A named capture group. [[^,]+]
///      Any character that is NOT in this class: [,], one or more repetitions
///  ,
///  id:
///      id:
///  [ID]: A named capture group. [[^\s]+]
///      Any character that is NOT in this class: [\s], one or more repetitions
///  Whitespace
///  sub:
///      sub:
///  [Sub]: A named capture group. [\w+]
///      Alphanumeric, one or more repetitions
///  Whitespace
///  dlvrd:
///      dlvrd:
///  [Dlvrd]: A named capture group. [\w+]
///      Alphanumeric, one or more repetitions
///  Whitespace
///  submit\sdate:
///      submit
///      Whitespace
///      date:
///  [SubmitDate]: A named capture group. [\w+]
///      Alphanumeric, one or more repetitions
///  Whitespace
///  done\sdate:
///      done
///      Whitespace
///      date:
///  [DoneDate]: A named capture group. [\w+]
///      Alphanumeric, one or more repetitions
///  Whitespace
///  stat:
///      stat:
///  [Status]: A named capture group. [\w+]
///      Alphanumeric, one or more repetitions
///  Whitespace
///  err:
///      err:
///  [Error]: A named capture group. [\d+]
///      Any digit, one or more repetitions
///  Whitespace
///  
///
/// </summary>
public static Regex regex = new Regex(
      "^(?<Date>[^,]+),\r\n(?<Number>[^,]+),\r\n(?<Text1>[^,]+),\r\nid:(?"+
      "<ID>[^\\s]+)\\s\r\nsub:(?<Sub>\\w+)\\s\r\ndlvrd:(?<Dlvrd>\\w+)\\s"+
      "\r\nsubmit\\sdate:(?<SubmitDate>\\w+)\\s\r\ndone\\sdate:(?<DoneD"+
      "ate>\\w+)\\s\r\nstat:(?<Status>\\w+)\\s\r\nerr:(?<Error>\\d+)\\s",
    RegexOptions.Multiline
    | RegexOptions.ExplicitCapture
    | RegexOptions.CultureInvariant
    | RegexOptions.IgnorePatternWhitespace
    | RegexOptions.Compiled
    );
我个人建议您将测试限制为一行数据,并将其称为:

var match = regex.Match(inputLineOfData);
这意味着您可以:

if ( match.Success )
{
   var id = match.Groups["ID"].Value;
   var submitDate = match.Groups["SubmitDate"].Value;  // Parse to DateTime
   var doneDate = match.Groups["DoneDate"].Value;  // Parse to DateTime

   // etc for 'sub', 'dlvrd', 'Status', 'Error'..
}

你想用哪种语言?你要用哪种正则表达式引擎
var match = regex.Match(inputLineOfData);
if ( match.Success )
{
   var id = match.Groups["ID"].Value;
   var submitDate = match.Groups["SubmitDate"].Value;  // Parse to DateTime
   var doneDate = match.Groups["DoneDate"].Value;  // Parse to DateTime

   // etc for 'sub', 'dlvrd', 'Status', 'Error'..
}