C# c字符串处理中的错误是什么？_C#_Regex_String

C# c字符串处理中的错误是什么？

c# regex string

C# c字符串处理中的错误是什么？,c#,regex,string,C#,Regex,String,下面是一个示例HTML文档，我将其作为MemoryStream在下面给出的代码中传递 <h5>Sample Document </h5> <h3> Present Tense </h3> </p><p>The present tense is just as you have learned. You take the dictionary form of a verb, drop the 다, a

下面是一个示例HTML文档，我将其作为MemoryStream在下面给出的代码中传递

<h5>Sample Document </h5>
<h3> Present Tense </h3>

</p><p>The present tense is just as you have learned.  You take the dictionary form of a verb, drop the &#45796;, add the appropriate ending.

</p><p>&#47673;&#45796; - &#47673; + &#50612;&#50836; = &#47673;&#50612;&#50836; <br />
&#47560;&#49884;&#45796; - &#47560;&#49884; + &#50612;&#50836; - &#47560;&#49884;&#50612;&#50836; - &#47560;&#49492;&#50836;. <br />

</p><p>This tense is used to represent what happens in the present.  I eat. I drink.  It is a general term for the present.

发布的源代码似乎不完整。永远不会调用TestByteOffset。未定义文件名、mshtml、正文、myRange、intCount和OccurenceNo。docContent从未在Main中引用，也没有任何其他可能包含原始文本fs、br、bit或Mr的变量。CompleteRange似乎必须包含该文本，但除非_body可以提供该文本，否则无法获取该文本，并且_body未定义

请检查以确保您发布的代码完整且可以编译

如果代码没有Fredrik Mörk提到的那么多不必要的操作，那么调试代码就会容易得多。这不仅仅是一个使用适当方式的问题；如果代码很难理解，那么调试就困难得多。

因此，您获得一个表示文件的流，将其读入字节数组，从字节数组创建一个新流，将该流传递给ReadDocument，这将使其成为字节数组。为什么不直接将第一个文件流传递给ReadDocument呢。或者更好的方法是，将FileStream内容读入一个字符串，并对该字符串进行操作？我理解您的观点，但我想我正在做同样的事情，尽管不是以正确的方式。首先删除您拥有的空捕获。你可以投一个球；在其中重新显示异常。空的catch将默默地吞下任何异常，这意味着您不知道代码是否工作，也不知道问题可能发生在哪里或原因。如果需要尝试后续操作，您甚至可以将错误日志记录放在catch块中。虽然我不知道为什么会这样。。。我的同事经常忽略这一点。。。谢天谢地，它通常在部署之前被捕获。

using System;
using System.Collections.Generic;
using System.Text;

namespace MultiByteStringHandling
{
    class Program
    {
        
        static void Main(string[] args)
        {
            FileStream fs = new FileStream(FileName, FileMode.Open);
            BinaryReader br = new BinaryReader(fs);
            byte[] bit = br.ReadBytes((int)fs.Length);
            MemoryStream Mr = new MemoryStream(bit);
            ReadDocument(Mr);

            mshtml.IHTMLTxtRange CompleteRange = 
                              _body.createTextRange().duplicate();
            int intByteOffset = 0;
            Regex reg = default(Regex);
          
            try
            {
                // Get all of the text that is in between HTML tags. 
                string regSearchText = myRange.htmlText;
                string strTemp = regSearchText + "\\s*";

                string strExp = ">(([^<])*?)" + strTemp + "(([^<])*?)<";
                string _cleanedSource = "";

                _cleanedSource = CompleteRange.htmlText;

                // Use regular expressions to find a collection of matches 
                //that match a certain pattern. 
                foreach (Match m in Regex.Matches(_cleanedSource, strExp,
                       RegexOptions.IgnoreCase))
                {
                    Int32 ret = default(Int32);
                    Int32 index = default(Int32);
                    string strMatch = m.Value;

                    foreach (Match m2 in Regex.Matches(strMatch, strTemp, 
                            RegexOptions.IgnoreCase))
                    {
                        // Increment counter when finding a match. 
                        intCount += 1;
                        // If counter matches occurrence number, return 
                        //source offset. 
                        if (intCount == OccurenceNo)
                        {
                            //Source offset is the index of the overall 
                            //match + index innerText Match. 

                            int intCharOffset = m.Index + m2.Index;
                            System.Text.UTF8Encoding d = new        
                                        System.Text.UTF8Encoding();
                            // Using the SourceText will give an accurate 
                            //byte offset. 
                           intByteOffset = d.GetBytes(
                          _cleanedSource.Substring(0, intCharOffset)).Length;
                        }
                    }
                }
            }
            catch (Exception ex)
            {
             throw ex;
            }
            finally
            {
               
            }
        }

    private void ReadDocument(Stream sD)
    {
        System.IO.MemoryStream ms = new System.IO.MemoryStream();
        System.IO.BinaryWriter bw = new System.IO.BinaryWriter(ms);
        bool hasMore = true;
        sD.Position = 0;
        using (System.IO.BinaryReader br = new System.IO.BinaryReader(sD))
        {
            while (hasMore)
            {
                byte[] buffer = br.ReadBytes(8192);
                hasMore = buffer.Length > 0;
                if (hasMore)
                {
                    bw.Write(buffer);
                }
            }
        }
        byte[] docBuffer = ms.GetBuffer();
        docContent = new byte[docBuffer.Length + 1];
        Array.Copy(docBuffer, docContent, docBuffer.Length);
    }
    private bool TestByteOffset(TransparencyItemType transparency)
    {
        System.Text.UTF8Encoding encoding = default(System.Text.UTF8Encoding);
        string byteOffsetLabel = null;
        Int32 iLength = default(Int32);
        Int32 offset = default(Int32);

        if (((transparency.Label == null) == false))
        {
            iLength = Convert.ToInt32(transparency.Label.IEOffset.Length);
            offset = Convert.ToInt32(transparency.Label.IEOffset.Offset);
        }
        else if (((transparency.Value == null) == false))
        {
            if(transparency.Value.ByteOffset!=null)
            {
                if (transparency.Value.ByteOffset.Offset != -1)
                {
                    iLength = Convert.ToInt32(transparency.Value.ByteOffset.Length);
                    offset = Convert.ToInt32(transparency.Value.ByteOffset.Offset);
                }
            }
        }
        else
        {
            return false;
        }
  }