C# 如何以最快的方式计算文本文件中某个字符组合的出现次数？_C#_String_Character_Text Files

C# 如何以最快的方式计算文本文件中某个字符组合的出现次数？

c# string

C# 如何以最快的方式计算文本文件中某个字符组合的出现次数？,c#,string,character,text-files,C#,String,Character,Text Files,我想创建一个方法，计算.txt文件（C#）中一系列字符的出现次数。我在这里找到了一些相关的问题，这些问题都有正确的答案。但是，某些情况限制了可能的解决方案：这个方法必须运行得相当快，因为我必须在程序中使用它上百次文件中的文本过长，无法以字符串形式读取谢谢你的帮助这个方法必须运行得相当快，因为我必须在程序中使用它上百次根据，SequenceEqual of往往是当今.NET中比较数组切片的最快方法（不安全或p/Invoke方法除外）文件中的文本过长，无法以字符串形式读取使用或可以

我想创建一个方法，计算.txt文件（C#）中一系列字符的出现次数。我在这里找到了一些相关的问题，这些问题都有正确的答案。但是，某些情况限制了可能的解决方案：

这个方法必须运行得相当快，因为我必须在程序中使用它上百次
文件中的文本过长，无法以字符串形式读取

谢谢你的帮助

这个方法必须运行得相当快，因为我必须在程序中使用它上百次

根据，SequenceEqual of往往是当今.NET中比较数组切片的最快方法（不安全或p/Invoke方法除外）

文件中的文本过长，无法以字符串形式读取

使用或可以轻松解决此问题

简而言之，您需要读取分块的文件：从文件中读取固定大小的部分，查找其中的事件，读取下一部分，查找事件，等等。这可以在不向后移动光标的情况下进行编码，在处理下一个零件时，只需考虑每个零件的剩余部分

以下是我使用FileStream和Span的方法：

当您不指定编码参数时，将通过检查文件的名称自动检测编码。如果BOM不存在，ASCII编码被认为是一种回退。

只需按程序可以处理的批次读取文件，然后处理每个批次。边缘格：继续阅读每一批，直到找到合适的词边界。谢谢。这是否意味着在StreamReader中没有方法可以读取下一个2/3/X字符而不使用它们？如果您只想计数，那么使用它们有什么不对？您尝试过什么吗？我们绝对不是在做你的工作，那就是思考，尝试，再思考。我浏览了所有StreamReader方法，没有任何方法可以将光标移回。非常感谢，在仔细阅读和解释之后，我将添加另一条评论。

public static int CountOccurences(Stream stream, string searchString, Encoding encoding = null, int bufferSize = 4096)
{
    if (stream == null)
        throw new ArgumentNullException(nameof(stream));

    if (searchString == null)
        throw new ArgumentNullException(nameof(searchString));

    if (!stream.CanRead)
        throw new ArgumentException("Stream must be readable.", nameof(stream));

    if (bufferSize <= 0)
        throw new ArgumentException("Buffer size must be a positive number.", nameof(bufferSize));

    // detecting encoding
    Span<byte> bom = stackalloc byte[4];

    var actualLength = stream.Read(bom);
    if (actualLength == 0)
        return 0;

    bom = bom.Slice(0, actualLength);

    Encoding detectedEncoding;
    if (bom.StartsWith(Encoding.UTF8.GetPreamble()))
        detectedEncoding = Encoding.UTF8;
    else if (bom.StartsWith(Encoding.UTF32.GetPreamble()))
        detectedEncoding = Encoding.UTF32;
    else if (bom.StartsWith(Encoding.Unicode.GetPreamble()))
        detectedEncoding = Encoding.Unicode;
    else if (bom.StartsWith(Encoding.BigEndianUnicode.GetPreamble()))
        detectedEncoding = Encoding.BigEndianUnicode;
    else
        detectedEncoding = null;

    if (detectedEncoding != null)
    {
        if (encoding == null)
            encoding = detectedEncoding;

        if (encoding == detectedEncoding)
            bom = bom.Slice(detectedEncoding.GetPreamble().Length);
    }
    else if (encoding == null)
        encoding = Encoding.ASCII;

    // acquiring a buffer
    ReadOnlySpan<byte> searchBytes = encoding.GetBytes(searchString);

    bufferSize = Math.Max(Math.Max(bufferSize, searchBytes.Length), 128);

    var bufferArray = ArrayPool<byte>.Shared.Rent(bufferSize);
    try
    {
        var buffer = new Span<byte>(bufferArray, 0, bufferSize);

        // looking for occurences
        bom.CopyTo(buffer);
        actualLength = bom.Length + stream.Read(buffer.Slice(bom.Length));
        var occurrences = 0;
        do
        {
            var index = 0;
            var endIndex = actualLength - searchBytes.Length;
            for (; index <= endIndex; index++)
                if (buffer.Slice(index, searchBytes.Length).SequenceEqual(searchBytes))
                    occurrences++;

            if (actualLength < buffer.Length)
                break;

            ReadOnlySpan<byte> leftover = buffer.Slice(index);
            leftover.CopyTo(buffer);
            actualLength = leftover.Length + stream.Read(buffer.Slice(leftover.Length));
        }
        while (true);

        return occurrences;
    }
    finally { ArrayPool<byte>.Shared.Return(bufferArray); }
}

static class Compatibility
{
    public static int Read(this Stream stream, Span<byte> buffer)
    {
        // copied over from corefx sources (https://github.com/dotnet/corefx/blob/master/src/Common/src/CoreLib/System/IO/Stream.cs)
        byte[] sharedBuffer = ArrayPool<byte>.Shared.Rent(buffer.Length);
        try
        {
            int numRead = stream.Read(sharedBuffer, 0, buffer.Length);
            if ((uint)numRead > buffer.Length)
                throw new IOException("Stream was too long.");

            new Span<byte>(sharedBuffer, 0, numRead).CopyTo(buffer);
            return numRead;
        }
        finally { ArrayPool<byte>.Shared.Return(sharedBuffer); }
    }
}

using (var fs = new FileStream(@"path-to-file", FileMode.Open, FileAccess.Read, FileShare.Read))
    Console.WriteLine(CountOccurences(fs, "string to search"));