C# 使用“拆分”删除由卷曲双引号包围的字符串部分_C#_Split

C# 使用“拆分”删除由卷曲双引号包围的字符串部分

C# 使用“拆分”删除由卷曲双引号包围的字符串部分,c#,split,C#,Split,我将以下内容用作C#中的split函数的参数：它适用于所有的“括号”，但不适用于“卷曲双引号”。我不明白为什么。这是拆分中的错误，还是卷曲引号字符的特性我将以下内容作为输入字符串： “这是一个前评论”，然后是一些重要信息[可能有一个嵌入的评论]，然后是一些更重要的信息（然后是一个后评论）我希望去掉注释，但将它们捕获到一个结构中，留下一个干净的信息字符串。在我尝试添加卷曲的双引号作为额外的分隔符之前，所有这些都用括号很好地工作了。。。（我知道嵌入的评论被故意收集为帖子评论）我写的代码如下

我将以下内容用作C#中的

split

函数的参数：

它适用于所有的“括号”，但不适用于“卷曲双引号”。我不明白为什么。这是

拆分中的错误，还是卷曲引号字符的特性
我将以下内容作为输入字符串：
“这是一个前评论”，然后是一些重要信息[可能有一个嵌入的评论]，然后是一些更重要的信息（然后是一个后评论）
我希望去掉注释，但将它们捕获到一个结构中，留下一个干净的信息字符串。在我尝试添加卷曲的双引号作为额外的分隔符之前，所有这些都用括号很好地工作了。。。
（我知道嵌入的评论被故意收集为帖子评论）
我写的代码如下：
class CommentSplit
{
    public bool split = false;
    public bool error = false;
    public string original = "";
    public string remainder = "";
    public string preComment = "";
    public string postComment = "";

    public CommentSplit(string inString, char[] startComment, char[] endComment, string[] ignoreStrings, string[] addStrings, bool usePre) // creator
    {
        if (inString == null)
            return;
        original = inString;
        string[] starts = inString.Split(startComment);
        if (starts.Length == 1)
        {
            remainder = inString;
            return;
        }
        if (starts[0] != "")
            remainder += starts[0].TrimEnd();
        for (int i = 1; i < starts.Length; i++)
        {
            string[] ends = starts[i].Split(endComment);
            if (ends.Length != 2) // more than one end comment for a start comment - BUT what about one start and one end comment
            {
                error = true;
                return;
            }
            if (addStrings == null)
            {
                if (ignoreStrings == null)
                {
                    if ((remainder == "") && usePre)
                        preComment += ends[0];
                    else
                        postComment += ends[0];
                }
                else
                {
                    bool ignore = false;
                    for (int z = 0; z < ignoreStrings.Length; z++)
                    {
                        if (ends[0].ToLower() == ignoreStrings[z])
                            ignore = true;
                    }
                    if (!ignore) // was a comment but we might want to ignore it
                    {
                        if ((remainder == "") && usePre)
                        {
                            if (preComment != "")
                                preComment += " ";
                            preComment += ends[0];
                        }
                        else
                        {
                            if (postComment != "")
                                postComment += " ";
                            postComment += ends[0];
                        }
                    }
                }
            }
            else
            {
                bool add = false;
                for (int z = 0; z < addStrings.Length; z++)
                {
                    if (ends[0].ToLower() == addStrings[z])
                        add = true;
                }
                if (add) // was a comment but want it in the remainder
                {
                    if (remainder != "")
                        remainder += " ";
                    remainder += ends[0];
                }
                else
                {
                    if (ignoreStrings == null)
                    {
                        if ((remainder == "") && usePre)
                            preComment += ends[0];
                        else
                            postComment += ends[0];
                    }
                    else
                    {
                        bool ignore = false;
                        for (int z = 0; z < ignoreStrings.Length; z++)
                        {
                            if (ends[0].ToLower() == ignoreStrings[z])
                                ignore = true;
                        }
                        if (!ignore) // was a comment but we might want to ignore it
                        {
                            if ((remainder == "") && usePre)
                            {
                                if (preComment != "")
                                    preComment += " ";
                                preComment += ends[0];
                            }
                            else
                            {
                                if (postComment != "")
                                    postComment += " ";
                                postComment += ends[0];
                            }
                        }
                    }
                }

            }
            if (remainder != "")
                remainder += " ";
            remainder += ends[1].Trim();
        }
        split = true;
    } // CommentSplit
}

类注释拆分
{
公共布尔分割=假；
公共布尔错误=false；
公共字符串original=“”；
公共字符串余数=”；
公共字符串预注释=”；
公共字符串postComment=“”；
public CommentSplit（string inString，char[]startComment，char[]endComment，string[]ignoreStrings，string[]addStrings，bool usePre）//creator
{
if（inString==null）
返回；
原始=安装；
string[]start=inString.Split（startComment）；
if（start.Length==1）
{
余数=安装；
返回；
}
如果（开始[0]！=“”）
余数+=开始[0]。修剪结束（）；
对于（int i=1；i

我应该注意到，我是一名退休的C程序员，涉足C#，因此我的风格可能不是面向对象的高效。我最初确实包含了直接（非卷曲）双引号，但它们并不重要，事实上，由于没有前后定界符版本，所以我将它们塞进了代码中。
这是代码中的其他内容，因为这个小的可验证示例可以很好地工作：
char[] delimiterComment = { '(', '{', '[', '\u201C', '\u201D', '"', '“', '”', '}', ']', ')' };
string stringWithComment = "this has a “COMMENT” yeah really";
var result = stringWithComment.Split(delimiterComment);
//Output:
//result[0] = "this has a "
//result[1] = "COMMENT"
//result[2] = " yeah really"

所以你想删掉评论，例如
在这种情况下，您可以尝试一个简单的循环：
//TODO: I suggest combining starts and ends into array of pairs, e.g.
// KeyValuePair<string,string>[]
private static string CutOffComments(string source, char[] starts, char[] ends) {
  if (string.IsNullOrEmpty(source))
    return source;

  StringBuilder sb = new StringBuilder(source.Length);

  int commentIndex = -1;

  foreach (var c in source) {
    if (commentIndex >= 0) { // within a comment, looking for its end
      if (c == ends[commentIndex])
        commentIndex = -1;
    }
    else { // out of comment, do we starting a new one?
      commentIndex = Array.IndexOf(starts, c);

      if (commentIndex < 0)
        sb.Append(c);
    }
  }

  //TODO:
  // if (commentIndex >= 0) // dungling comment, e.g. 123[456

  return sb.ToString(); 
}

只需在单引号之间插入双引号，而不使用转义字符：
输入：
string s = "abc(121), {12}, \" HI \"";
Console.WriteLine(string.Join(Environment.NewLine,(s.Split(delimiterComment)).Select(s=> s)));

abc
121), 
12}, 
 HI

输出：
string s = "abc(121), {12}, \" HI \"";
Console.WriteLine(string.Join(Environment.NewLine,(s.Split(delimiterComment)).Select(s=> s)));

abc
121), 
12}, 
 HI

您能给出一个示例输入和预期输出吗？在您进行拆分的地方提供更多的代码。您能提供一些所需行为的示例吗？例如，我所做的
string s = "abc(121), {12}, \" HI \"";
Console.WriteLine(string.Join(Environment.NewLine,(s.Split(delimiterComment)).Select(s=> s)));

abc
121), 
12}, 
 HI