C# 使用另一个字节列表/数组统计字节列表/数组中的发生次数
我试图得到一个字节序列在另一个字节序列中发生的所有时间的计数。但是,如果已对字节进行计数,则不能重复使用字节。例如,给定字符串C# 使用另一个字节列表/数组统计字节列表/数组中的发生次数,c#,.net,vb.net,compression,C#,.net,Vb.net,Compression,我试图得到一个字节序列在另一个字节序列中发生的所有时间的计数。但是,如果已对字节进行计数,则不能重复使用字节。例如,给定字符串 k.k.k.k.k.假设字节序列是k.k它将只找到3次而不是5次,因为它们将被分解为:[k.k].[k.k].而不是[k.[k].[k].[k].[k].[k],它们在圈上,基本上只是向右移动了2次 理想的想法是了解压缩字典或运行时编码的外观。所以我们的目标是 k.k.k.k.k.只包含两个部分,因为(k.k.k.)是你能拥有的最大和最好的符号 以下是到目前为止的消息来
k.k.k.k.k.
假设字节序列是k.k
它将只找到3次而不是5次,因为它们将被分解为:[k.k].[k.k].
而不是[k.[k].[k].[k].[k].[k]
,它们在圈上,基本上只是向右移动了2次
理想的想法是了解压缩字典或运行时编码的外观。所以我们的目标是
k.k.k.k.k.
只包含两个部分,因为(k.k.k.)是你能拥有的最大和最好的符号
以下是到目前为止的消息来源:
using System;
using System.Collections.Generic;
using System.Collections;
using System.Linq;
using System.Text;
using System.IO;
static class Compression
{
static int Main(string[] args)
{
List<byte> bytes = File.ReadAllBytes("ok.txt").ToList();
List<List<int>> list = new List<List<int>>();
// Starting Numbers of bytes - This can be changed manually.
int StartingNumBytes = bytes.Count;
for (int i = StartingNumBytes; i > 0; i--)
{
Console.WriteLine("i: " + i);
for (int ii = 0; ii < bytes.Count - i; ii++)
{
Console.WriteLine("ii: " + i);
// New pattern comes with refresh data.
List<byte> pattern = new List<byte>();
for (int iii = 0; iii < i; iii++)
{
pattern.Add(bytes[ii + iii]);
}
DisplayBinary(bytes, "red");
DisplayBinary(pattern, "green");
int matches = 0;
// foreach (var position in bytes.ToArray().Locate(pattern.ToArray()))
for (int position = 0; position < bytes.Count; position++) {
if (pattern.Count > (bytes.Count - position))
{
continue;
}
for (int iiii = 0; iiii < pattern.Count; iiii++)
{
if (bytes[position + iiii] != pattern[iiii])
{
//Have to use goto because C# doesn't support continue <level>
goto outer;
}
}
// If it made it this far, it has found a match.
matches++;
Console.WriteLine("Matches: " + matches + " Orig Count: " + bytes.Count + " POS: " + position);
if (matches > 1)
{
int numBytesToRemove = pattern.Count;
for (int ra = 0; ra < numBytesToRemove; ra++)
{
// Remove it at the position it was found at, once it
// deletes the first one, the list will shift left and you'll need to be here again.
bytes.RemoveAt(position);
}
DisplayBinary(bytes, "red");
Console.WriteLine(pattern.Count + " Bytes removed.");
// Since you deleted some bytes, set the position less because you will need to redo the pos.
position = position - 1;
}
outer:
continue;
}
List<int> sublist = new List<int>();
sublist.Add(matches);
sublist.Add(pattern.Count);
// Some sort of calculation to determine how good the symbol was
sublist.Add(bytes.Count-((matches * pattern.Count)-matches));
list.Add(sublist);
}
}
Display(list);
Console.Read();
return 0;
}
static void DisplayBinary(List<byte> bytes, string color="white")
{
switch(color){
case "green":
Console.ForegroundColor = ConsoleColor.Green;
break;
case "red":
Console.ForegroundColor = ConsoleColor.Red;
break;
default:
break;
}
for (int i=0; i<bytes.Count; i++)
{
if (i % 8 ==0)
Console.WriteLine();
Console.Write(GetIntBinaryString(bytes[i]) + " ");
}
Console.WriteLine();
Console.ResetColor();
}
static string GetIntBinaryString(int n)
{
char[] b = new char[8];
int pos = 7;
int i = 0;
while (i < 8)
{
if ((n & (1 << i)) != 0)
{
b[pos] = '1';
}
else
{
b[pos] = '0';
}
pos--;
i++;
}
//return new string(b).TrimStart('0');
return new string(b);
}
static void Display(List<List<int>> list)
{
//
// Display everything in the List.
//
Console.WriteLine("Elements:");
foreach (var sublist in list)
{
foreach (var value in sublist)
{
Console.Write("{0,4}", value);
}
Console.WriteLine();
}
//
// Display total count.
//
int count = 0;
foreach (var sublist in list)
{
count += sublist.Count;
}
Console.WriteLine("Count:");
Console.WriteLine(count);
}
static public int SearchBytePattern(byte[] pattern, byte[] bytes)
{
int matches = 0;
// precomputing this shaves some seconds from the loop execution
int maxloop = bytes.Length - pattern.Length;
for (int i = 0; i < maxloop; i++)
{
if (pattern[0] == bytes[i])
{
bool ismatch = true;
for (int j = 1; j < pattern.Length; j++)
{
if (bytes[i + j] != pattern[j])
{
ismatch = false;
break;
}
}
if (ismatch)
{
matches++;
i += pattern.Length - 1;
}
}
}
return matches;
}
}
使用系统;
使用System.Collections.Generic;
使用系统集合;
使用System.Linq;
使用系统文本;
使用System.IO;
静态类压缩
{
静态int Main(字符串[]args)
{
List bytes=File.ReadAllBytes(“ok.txt”).ToList();
列表=新列表();
//起始字节数-可以手动更改。
int StartingNumBytes=字节数。计数;
对于(int i=StartingNumBytes;i>0;i--)
{
控制台写入线(“i:+i”);
for(int ii=0;ii(bytes.Count-位置))
{
继续;
}
对于(int-iiii=0;iiii1)
{
int numBytesToRemove=pattern.Count;
对于(int-ra=0;ra 对于(int i=0;i快速且肮脏,没有正则表达式。虽然我不确定它是否回答了问题的意图,但它应该相对快速。我想我将对正则表达式进行一些计时测试,以确定相对速度:
private int CountOccurrences(string TestString, string TestPattern)
{
int PatternCount = 0;
int SearchIndex = 0;
if (TestPattern.Length == 0)
throw new ApplicationException("CountOccurrences: Unable to process because TestPattern has zero length.");
if (TestString.Length == 0)
return 0;
do
{
SearchIndex = TestString.IndexOf(TestPattern, SearchIndex);
if (SearchIndex >= 0)
{
++PatternCount;
SearchIndex += TestPattern.Length;
}
}
while ((SearchIndex >= 0) && (SearchIndex < TestString.Length));
return PatternCount;
}
private void btnTest_Click(object sender, EventArgs e)
{
string TestString1 = "k.k.k.k.k.k.k.k.k.k.k.k";
string TestPattern1 = "k.k";
System.Console.WriteLine(CountOccurrences(TestString1, TestPattern1).ToString()); // outputs 6
System.Console.WriteLine(CountOccurrences(TestString1 + ".k", TestPattern1).ToString()); // still 6
System.Console.WriteLine(CountOccurrences(TestString1, TestPattern1 + ".").ToString()); // only 5
}
private int countoccurrencess(string TestString,string TestPattern)
{
int PatternCount=0;
int SearchIndex=0;
if(TestPattern.Length==0)
抛出新的ApplicationException(“CountOccurrencess:无法处理,因为TestPattern的长度为零。”);
if(TestString.Length==0)
返回0;
做
{
SearchIndex=TestString.IndexOf(TestPattern,SearchIndex);
如果(搜索索引>=0)
{
++模式计数;
SearchIndex+=TestPattern.Length;
}
}
而((SearchIndex>=0)和&(SearchIndex private int CountOccurrences(string TestString, string TestPattern)
{
int PatternCount = 0;
int SearchIndex = 0;
if (TestPattern.Length == 0)
throw new ApplicationException("CountOccurrences: Unable to process because TestPattern has zero length.");
if (TestString.Length == 0)
return 0;
do
{
SearchIndex = TestString.IndexOf(TestPattern, SearchIndex);
if (SearchIndex >= 0)
{
++PatternCount;
SearchIndex += TestPattern.Length;
}
}
while ((SearchIndex >= 0) && (SearchIndex < TestString.Length));
return PatternCount;
}
private void btnTest_Click(object sender, EventArgs e)
{
string TestString1 = "k.k.k.k.k.k.k.k.k.k.k.k";
string TestPattern1 = "k.k";
System.Console.WriteLine(CountOccurrences(TestString1, TestPattern1).ToString()); // outputs 6
System.Console.WriteLine(CountOccurrences(TestString1 + ".k", TestPattern1).ToString()); // still 6
System.Console.WriteLine(CountOccurrences(TestString1, TestPattern1 + ".").ToString()); // only 5
}
public static IEnumerable<int> Find<T>(T[] pattern, T[] sequence, bool overlap)
{
int i = 0;
while (i < sequence.Length - pattern.Length + 1)
{
if (pattern.SequenceEqual(sequence.Skip(i).Take(pattern.Length)))
{
yield return i;
i += overlap ? 1 : pattern.Length;
}
else
{
i++;
}
}
}