如何确定c#中字符串列表中的重复项?
我想知道如何确定下面的重复项,并用重复项填充新的字符串列表如何确定c#中字符串列表中的重复项?,c#,.net,list,linq,C#,.net,List,Linq,我想知道如何确定下面的重复项,并用重复项填充新的字符串列表 List<string> test = new List<string>(); List test=newlist(); 我一直在考虑这些问题,但得到一个编译错误,说我不能将-运算符应用于字符串列表 List<string> namesCollisions = test - test.Distinct().ToList(); List namesCollisions=test-test.Dist
List<string> test = new List<string>();
List test=newlist();
我一直在考虑这些问题,但得到一个编译错误,说我不能将-运算符应用于字符串列表
List<string> namesCollisions = test - test.Distinct().ToList();
List namesCollisions=test-test.Distinct().ToList();
因此,我尝试了以下建议,即使PROGRAM3列出了两次,列表还是空的
在linq或SQL中,必须使用
而不是负号,如下所示:
var namesCollisions = test.Except(test.Distinct().ToList());
但我不明白你们想要什么结果,这个答案总是返回空列表 无linq,纯代码
获取所有副本
public static IEnumerable<string> GetDuplicates(IEnumerable<string> original)
{
var hs = new HashSet<string>();
foreach (var item in original)
{
if (!hs.Add(item))
{
yield return item;
}
}
}
public static IEnumerable<string> GetDistinctDuplicates(IEnumerable<string> original)
{
var dict = new Dictionary<string, bool>();
foreach (var s in original)
{
// If found duplicate
if (dict.TryGetValue(s, out var isReturnedDupl))
{
// If already returned
if (isReturnedDupl)
{
continue;
}
dict[s] = true;
yield return s;
}
else
{
// First meet
dict.Add(s, false);
}
}
}
算法
- HashSet(GetDuplicates)
- Dict(getdistinctdeplicates)
- 分组方式(方法)
- GroupByWithSkip(方法)
是否使用默认配置进行基准测试
输入
- AllDupl-当集合仅包含相同字符串时的角位大小写
- NoDupl-集合仅包含uniq字符串时的角位大小写
Ini:
传说
结果
摘要
GroupBy、HashSet和Dict的性能严重依赖于输入。如果输入只包含重复项-字典更好。但当输入仅包含uniq值时,HashSet更好。对于随机输入(就像在通用代码中一样),GroupBy比HashSet工作得更好。还要注意的是,只有唯一值的输入-对于具有巨大性能命中率的GroupBy来说,这是最糟糕的情况
结果中的分配和GC列非常有趣
所以,更好的方法在很大程度上取决于输入中重复的数量、输入总数量和您的目标
来源
类程序
{
静态void Main(字符串[]参数)
{
var conf=ManualConfig.Create(DefaultConfig.Instance);
conf.Add(MemoryDiagnoser.Default);
var summary=BenchmarkRunner.Run(conf);
}
}
公共类重复FindBench
{
[基准]
[论证来源(数据名称))]
公共列表GroupBy(字符串[]输入,字符串描述)
{
返回input.GroupBy(x=>x)
.Where(g=>g.Count()>1)
.选择(y=>y.Key)
.ToList();
}
[基准]
[论证来源(数据名称))]
公共列表GroupByWithSkip(字符串[]输入,字符串描述)
{
返回input.GroupBy(x=>x)
.Where(g=>g.Skip(1).Any())
.选择(y=>y.Key)
.ToList();
}
[基准]
[论证来源(数据名称))]
公共列表哈希集(字符串[]输入,字符串描述)
{
返回GetDuplicates(input.Distinct().ToList();
}
[基准]
[论证来源(数据名称))]
公共列表目录(字符串[]输入,字符串描述)
{
返回GetDistinctDuplicates(input.ToList();
}
公共静态IEnumerable GetDuplicates(IEnumerable原件)
{
var hs=新的HashSet();
foreach(原始var项目)
{
如果(!hs.添加(项目))
{
收益回报项目;
}
}
}
公共静态IEnumerable GetDistinctDuplicates(IEnumerable原件)
{
var dict=新字典();
foreach(原始版本中的var s)
{
//如果发现重复的
if(dict.TryGetValue,out var isReturnedUpl))
{
//如果已经返回
如果(isReturnedUpl)
{
继续;
}
dict[s]=真;
收益率;
}
其他的
{
//初次见面
dict.Add(s,false);
}
}
}
公共IEnumerable数据()
{
常数int count1=1000;
返回新对象[]{ArrayParam.ForPrimitives(createNodeUpInput(count1.ToArray()),$“NoDupl,Count={count1}”};
返回新对象[]{ArrayParam.ForPrimitives(CreateAllDuplInput(count1).ToArray()),$“AllDupl,Count={count1}”;
常数int count2=10_000;
返回新对象[]{ArrayParam.ForPrimitives(createNodeUpInput(count2.ToArray()),$“NoDupl,Count={count2}”};
返回新对象[]{ArrayParam.ForPrimitives(CreateAllDuplInput(count2).ToArray()),$“AllDupl,Count={count2}”;
常数int count3=100_000;
返回新对象[]{ArrayParam.ForPrimitives(CreateNodeUpInput(count3.ToArray()),$“NoDupl,Count={count3}”};
返回新对象[]{ArrayParam.ForPrimitives(CreateAllDuplInput(count3).ToArray()),$“AllDupl,Count={count3}”;
}
公共列表CreateNodeUpInput(int inputSize)
{
var result=新列表();
for(int i=0;i
基准测试
- 我测试了4种算法
- 在两个
.Net Framework 4.7.1上的64位`
每个测试平均运行1000次
我缩放了每个数据集李>
在每次测试之前,框架执行GC.Collect
和GC.WaitForPendingFinalizers
和1个单独试验,以验证结果
我使用了4批输入数据
输入
public static List<string> ListofSimpleString(int scale)
{
return Enumerable.Range(0, scale)
.Select(x => Enumerable.Range(0,10).Select(y => (char)_rand.Next(255)).ToString())
.ToList();
}
public static List<string> ListofSimpleString2(int scale)
{
return Enumerable.Range(0, scale)
.Select(x => Enumerable.Range(0, 10).Select(y => (char)_rand.Next(48,95)).ToString())
.ToList();
}
public static List<string> ListofSimpleString4(int scale)
{
return Enumerable.Range(0, scale)
.Select(x => _rand.Next(10).ToString())
.ToList();
}
public static List<string> ListofSimpleString5(int scale)
{
return Enumerable.Range(0, scale)
.Select(x => "duplicate value")
.ToList();
}
测试1
--- 10 Random ascii chars -----------------------------------------------------------------------
| Value | Average | Fastest | Cycles | Garbage | Test | Gain |
--- Scale 10,000 ----------------------------------------------------------------- Time 0.099 ---
| Dictionary | 1.298 ms | 1.040 ms | 4,180,070 | 8.000 KB | N/A | 43.71 % |
| GroupByCount | 1.315 ms | 1.128 ms | 4,473,950 | 264.242 KB | N/A | 42.98 % |
| GroupBySkipAny | 1.387 ms | 1.097 ms | 4,622,163 | 264.242 KB | N/A | 39.88 % |
| HashSet | 2.306 ms | 2.010 ms | 7,842,618 | 8.000 KB | Base | 0.00 % |
--- Scale 100,000 ---------------------------------------------------------------- Time 0.656 ---
| Dictionary | 11.599 ms | 11.090 ms | 39,514,786 | 8.000 KB | N/A | 48.72 % |
| GroupByCount | 12.469 ms | 12.172 ms | 42,491,474 | 2.008 MB | N/A | 44.88 % |
| GroupBySkipAny | 12.834 ms | 12.490 ms | 43,703,545 | 2.008 MB | N/A | 43.26 % |
| HashSet | 22.620 ms | 22.171 ms | 77,091,768 | 8.000 KB | Base | 0.00 % |
--- Scale 1,000,000 -------------------------------------------------------------- Time 6.502 ---
| Dictionary | 116.635 ms | 114.583 ms | 397,153,297 | 8.000 KB | N/A | 50.92 % |
| GroupBySkipAny | 130.604 ms | 128.622 ms | 444,500,775 | 16.008 MB | N/A | 45.04 % |
| GroupByCount | 133.642 ms | 128.416 ms | 455,399,304 | 16.008 MB | N/A | 43.76 % |
| HashSet | 237.646 ms | 231.029 ms | 809,489,660 | 8.000 KB | Base | 0.00 % |
--- Scale 10,000,000 ------------------------------------------------------------- Time 5.844 ---
| Dictionary | 1,169.275 ms | 1,163.115 ms | 3,984,243,078 | 8.000 KB | N/A | 50.76 % |
| GroupBySkipAny | 1,353.768 ms | 1,345.292 ms | 4,608,323,709 | 256.009 MB | N/A | 43.00 % |
| GroupByCount | 1,358.509 ms | 1,344.632 ms | 4,627,402,507 | 256.009 MB | N/A | 42.80 % |
| HashSet | 2,374.831 ms | 2,334.440 ms | 8,089,227,303 | 8.000 KB | Base | 0.00 % |
--- Scale 100,000,000 ------------------------------------------------------------ Time 8.679 ---
| Dictionary | 11,751.858 ms | 11,656.590 ms | 40,027,059,699 | 8.000 KB | N/A | 52.24 % |
| GroupBySkipAny | 13,585.036 ms | 13,346.376 ms | 46,230,547,515 | 2.000 GB | N/A | 44.79 % |
| GroupByCount | 13,891.448 ms | 13,664.273 ms | 47,215,273,015 | 2.000 GB | N/A | 43.54 % |
| HashSet | 24,605.782 ms | 24,440.468 ms | 83,658,042,598 | 8.000 KB | Base | 0.00 % |
-------------------------------------------------------------------------------------------------
测试2
--- 10 Random printable ascii chars -------------------------------------------------------------
| Value | Average | Fastest | Cycles | Garbage | Test | Gain |
--- Scale 10,000 ----------------------------------------------------------------- Time 0.259 ---
| Dictionary | 1.087 ms | 1.052 ms | 3,654,669 | 8.000 KB | N/A | 50.38 % |
| GroupByCount | 1.182 ms | 1.133 ms | 4,020,552 | 264.211 KB | N/A | 46.07 % |
| GroupBySkipAny | 1.300 ms | 1.163 ms | 4,415,126 | 264.211 KB | N/A | 40.67 % |
| HashSet | 2.191 ms | 2.024 ms | 7,462,586 | 8.000 KB | Base | 0.00 % |
--- Scale 100,000 ---------------------------------------------------------------- Time 0.758 ---
| Dictionary | 12.033 ms | 11.333 ms | 40,839,598 | 8.000 KB | N/A | 48.66 % |
| GroupBySkipAny | 12.821 ms | 12.616 ms | 43,599,424 | 2.008 MB | N/A | 45.30 % |
| GroupByCount | 12.903 ms | 12.643 ms | 43,879,671 | 2.008 MB | N/A | 44.95 % |
| HashSet | 23.438 ms | 22.309 ms | 79,514,592 | 8.000 KB | Base | 0.00 % |
--- Scale 1,000,000 -------------------------------------------------------------- Time 6.687 ---
| Dictionary | 119.383 ms | 116.696 ms | 406,335,788 | 8.000 KB | N/A | 51.02 % |
| GroupBySkipAny | 134.819 ms | 131.747 ms | 458,589,071 | 16.008 MB | N/A | 44.68 % |
| GroupByCount | 139.834 ms | 132.961 ms | 476,092,342 | 16.008 MB | N/A | 42.63 % |
| HashSet | 243.722 ms | 239.580 ms | 829,409,546 | 8.000 KB | Base | 0.00 % |
--- Scale 10,000,000 ------------------------------------------------------------- Time 8.579 ---
| Dictionary | 1,237.373 ms | 1,213.387 ms | 4,203,352,967 | 8.000 KB | N/A | 49.48 % |
| GroupByCount | 1,404.209 ms | 1,385.300 ms | 4,778,762,566 | 256.009 MB | N/A | 42.67 % |
| GroupBySkipAny | 1,423.058 ms | 1,384.701 ms | 4,838,714,809 | 256.009 MB | N/A | 41.90 % |
| HashSet | 2,449.190 ms | 2,381.713 ms | 8,334,623,472 | 8.000 KB | Base | 0.00 % |
--- Scale 100,000,000 ----------------------------------------------------------- Time 59.573 ---
| Dictionary | 12,126.807 ms | 11,692.415 ms | 41,233,771,464 | 8.000 KB | N/A | 49.00 % |
| GroupByCount | 13,289.256 ms | 13,062.683 ms | 45,292,203,200 | 2.000 GB | N/A | 44.12 % |
| GroupBySkipAny | 13,760.635 ms | 13,261.366 ms | 46,825,002,767 | 2.000 GB | N/A | 42.13 % |
| HashSet | 23,780.270 ms | 22,785.622 ms | 80,971,187,805 | 8.000 KB | Base | 0.00 % |
-------------------------------------------------------------------------------------------------
测试3
--- Random number between 0 and 10 ------------------------------------------------------------
| Value | Average | Fastest | Cycles | Garbage | Test | Gain |
--- Scale 10,000 --------------------------------------------------------------- Time 0.224 ---
| Dictionary | 0.397 ms | 0.387 ms | 1,349,447 | 8.000 KB | N/A | 50.59 % |
| GroupBySkipAny | 0.495 ms | 0.490 ms | 1,683,949 | 200.563 KB | N/A | 38.38 % |
| GroupByCount | 0.506 ms | 0.503 ms | 1,722,584 | 200.563 KB | N/A | 36.97 % |
| HashSet | 0.803 ms | 0.786 ms | 2,734,083 | 8.000 KB | Base | 0.00 % |
--- Scale 100,000 -------------------------------------------------------------- Time 0.459 ---
| Dictionary | 3.929 ms | 3.767 ms | 13,387,884 | 8.000 KB | N/A | 48.65 % |
| GroupBySkipAny | 5.195 ms | 4.873 ms | 17,699,816 | 2.510 MB | N/A | 32.09 % |
| GroupByCount | 5.233 ms | 4.904 ms | 17,825,215 | 2.510 MB | N/A | 31.60 % |
| HashSet | 7.650 ms | 7.444 ms | 26,081,151 | 8.000 KB | Base | 0.00 % |
--- Scale 1,000,000 ------------------------------------------------------------ Time 3.416 ---
| Dictionary | 39.934 ms | 38.031 ms | 136,107,565 | 8.000 KB | N/A | 47.61 % |
| GroupBySkipAny | 52.159 ms | 50.011 ms | 177,797,622 | 20.011 MB | N/A | 31.57 % |
| GroupByCount | 54.562 ms | 49.745 ms | 185,883,905 | 20.011 MB | N/A | 28.42 % |
| HashSet | 76.221 ms | 73.899 ms | 259,702,109 | 8.000 KB | Base | 0.00 % |
--- Scale 10,000,000 ---------------------------------------------------------- Time 33.643 ---
| Dictionary | 396.948 ms | 381.873 ms | 1,352,809,995 | 7.035 KB | N/A | 47.82 % |
| GroupByCount | 519.931 ms | 515.210 ms | 1,771,927,979 | 160.012 MB | N/A | 31.66 % |
| GroupBySkipAny | 537.953 ms | 516.127 ms | 1,833,424,578 | 160.013 MB | N/A | 29.29 % |
| HashSet | 760.781 ms | 751.582 ms | 2,592,592,185 | 8.000 KB | Base | 0.00 % |
--- Scale 100,000,000 --------------------------------------------------------- Time 43.701 ---
| Dictionary | 3,945.544 ms | 3,845.146 ms | 13,442,361,467 | 8.000 KB | N/A | 49.08 % |
| GroupByCount | 5,666.408 ms | 5,501.203 ms | 19,301,260,141 | 2.500 GB | N/A | 26.87 % |
| GroupBySkipAny | 5,688.156 ms | 5,536.729 ms | 19,370,101,611 | 2.500 GB | N/A | 26.59 % |
| HashSet | 7,748.656 ms | 7,605.495 ms | 26,399,373,179 | 7.315 KB | Base | 0.00 % |
-----------------------------------------------------------------------------------------------
测试4
--- All Duplicates ----------------------------------------------------------------------------
| Value | Average | Fastest | Cycles | Garbage | Test | Gain |
--- Scale 10,000 --------------------------------------------------------------- Time 0.957 ---
| Dictionary | 0.444 ms | 0.425 ms | 1,508,780 | 8.000 KB | N/A | 50.55 % |
| GroupBySkipAny | 0.561 ms | 0.543 ms | 1,912,069 | 264.211 KB | N/A | 37.44 % |
| GroupByCount | 0.566 ms | 0.544 ms | 1,927,506 | 264.211 KB | N/A | 36.93 % |
| HashSet | 0.897 ms | 0.876 ms | 3,058,602 | 8.000 KB | Base | 0.00 % |
--- Scale 100,000 -------------------------------------------------------------- Time 0.341 ---
| Dictionary | 4.342 ms | 4.195 ms | 14,764,603 | 8.000 KB | N/A | 56.79 % |
| GroupBySkipAny | 6.090 ms | 5.437 ms | 20,636,444 | 2.008 MB | N/A | 39.38 % |
| GroupByCount | 6.327 ms | 5.812 ms | 21,478,886 | 2.008 MB | N/A | 37.03 % |
| HashSet | 10.047 ms | 8.627 ms | 34,243,915 | 8.000 KB | Base | 0.00 % |
--- Scale 1,000,000 ------------------------------------------------------------ Time 2.962 ---
| Dictionary | 45.242 ms | 42.814 ms | 154,054,415 | 8.000 KB | N/A | 54.02 % |
| GroupBySkipAny | 58.574 ms | 53.289 ms | 199,411,629 | 16.008 MB | N/A | 40.47 % |
| GroupByCount | 63.450 ms | 54.705 ms | 215,792,787 | 16.008 MB | N/A | 35.52 % |
| HashSet | 98.396 ms | 85.450 ms | 335,093,581 | 8.000 KB | Base | 0.00 % |
--- Scale 10,000,000 ---------------------------------------------------------- Time 28.309 ---
| Dictionary | 432.955 ms | 424.321 ms | 1,474,860,339 | 8.000 KB | N/A | 49.97 % |
| GroupByCount | 600.265 ms | 581.515 ms | 2,044,282,844 | 256.009 MB | N/A | 30.64 % |
| GroupBySkipAny | 603.112 ms | 581.099 ms | 2,054,976,446 | 256.009 MB | N/A | 30.31 % |
| HashSet | 865.449 ms | 854.386 ms | 2,949,024,388 | 8.000 KB | Base | 0.00 % |
--- Scale 100,000,000 --------------------------------------------------------- Time 38.508 ---
| Dictionary | 4,394.937 ms | 4,261.656 ms | 14,973,292,181 | 8.000 KB | N/A | 50.11 % |
| GroupBySkipAny | 5,799.055 ms | 5,718.249 ms | 19,758,314,574 | 2.000 GB | N/A | 34.16 % |
| GroupByCount | 5,909.234 ms | 5,781.676 ms | 20,126,526,198 | 2.000 GB | N/A | 32.91 % |
| HashSet | 8,808.441 ms | 8,617.298 ms | 30,010,947,763 | 8.000 KB | Base | 0.00 % |
-----------------------------------------------------------------------------------------------
算法是
字典
public class Dictionary : Benchmark<List<string>, List<string>>
{
public static IEnumerable<string> GetDistinctDuplicates(IEnumerable<string> original)
{
var dict = new Dictionary<string, bool>();
foreach (var s in original)
if (dict.TryGetValue(s, out var isReturnedDupl))
{
if (isReturnedDupl)
continue;
dict[s] = true;
yield return s;
}
else
dict.Add(s, false);
}
protected override List<string> InternalRun()
{
return GetDistinctDuplicates(Input).ToList();
}
}
public class GroupBy : Benchmark<List<string>, List<string>>
{
protected override List<string> InternalRun()
{
return Input.GroupBy(x => x)
.Where(g => g.Count() > 1)
.Select(y => y.Key)
.ToList();
}
}
公共类字典:基准
{
公共静态IEnumerable GetDistinctDuplicates(IEnumerable原件)
{
var dict=新Di
--- 10 Random ascii chars -----------------------------------------------------------------------
| Value | Average | Fastest | Cycles | Garbage | Test | Gain |
--- Scale 10,000 ----------------------------------------------------------------- Time 0.099 ---
| Dictionary | 1.298 ms | 1.040 ms | 4,180,070 | 8.000 KB | N/A | 43.71 % |
| GroupByCount | 1.315 ms | 1.128 ms | 4,473,950 | 264.242 KB | N/A | 42.98 % |
| GroupBySkipAny | 1.387 ms | 1.097 ms | 4,622,163 | 264.242 KB | N/A | 39.88 % |
| HashSet | 2.306 ms | 2.010 ms | 7,842,618 | 8.000 KB | Base | 0.00 % |
--- Scale 100,000 ---------------------------------------------------------------- Time 0.656 ---
| Dictionary | 11.599 ms | 11.090 ms | 39,514,786 | 8.000 KB | N/A | 48.72 % |
| GroupByCount | 12.469 ms | 12.172 ms | 42,491,474 | 2.008 MB | N/A | 44.88 % |
| GroupBySkipAny | 12.834 ms | 12.490 ms | 43,703,545 | 2.008 MB | N/A | 43.26 % |
| HashSet | 22.620 ms | 22.171 ms | 77,091,768 | 8.000 KB | Base | 0.00 % |
--- Scale 1,000,000 -------------------------------------------------------------- Time 6.502 ---
| Dictionary | 116.635 ms | 114.583 ms | 397,153,297 | 8.000 KB | N/A | 50.92 % |
| GroupBySkipAny | 130.604 ms | 128.622 ms | 444,500,775 | 16.008 MB | N/A | 45.04 % |
| GroupByCount | 133.642 ms | 128.416 ms | 455,399,304 | 16.008 MB | N/A | 43.76 % |
| HashSet | 237.646 ms | 231.029 ms | 809,489,660 | 8.000 KB | Base | 0.00 % |
--- Scale 10,000,000 ------------------------------------------------------------- Time 5.844 ---
| Dictionary | 1,169.275 ms | 1,163.115 ms | 3,984,243,078 | 8.000 KB | N/A | 50.76 % |
| GroupBySkipAny | 1,353.768 ms | 1,345.292 ms | 4,608,323,709 | 256.009 MB | N/A | 43.00 % |
| GroupByCount | 1,358.509 ms | 1,344.632 ms | 4,627,402,507 | 256.009 MB | N/A | 42.80 % |
| HashSet | 2,374.831 ms | 2,334.440 ms | 8,089,227,303 | 8.000 KB | Base | 0.00 % |
--- Scale 100,000,000 ------------------------------------------------------------ Time 8.679 ---
| Dictionary | 11,751.858 ms | 11,656.590 ms | 40,027,059,699 | 8.000 KB | N/A | 52.24 % |
| GroupBySkipAny | 13,585.036 ms | 13,346.376 ms | 46,230,547,515 | 2.000 GB | N/A | 44.79 % |
| GroupByCount | 13,891.448 ms | 13,664.273 ms | 47,215,273,015 | 2.000 GB | N/A | 43.54 % |
| HashSet | 24,605.782 ms | 24,440.468 ms | 83,658,042,598 | 8.000 KB | Base | 0.00 % |
-------------------------------------------------------------------------------------------------
--- 10 Random printable ascii chars -------------------------------------------------------------
| Value | Average | Fastest | Cycles | Garbage | Test | Gain |
--- Scale 10,000 ----------------------------------------------------------------- Time 0.259 ---
| Dictionary | 1.087 ms | 1.052 ms | 3,654,669 | 8.000 KB | N/A | 50.38 % |
| GroupByCount | 1.182 ms | 1.133 ms | 4,020,552 | 264.211 KB | N/A | 46.07 % |
| GroupBySkipAny | 1.300 ms | 1.163 ms | 4,415,126 | 264.211 KB | N/A | 40.67 % |
| HashSet | 2.191 ms | 2.024 ms | 7,462,586 | 8.000 KB | Base | 0.00 % |
--- Scale 100,000 ---------------------------------------------------------------- Time 0.758 ---
| Dictionary | 12.033 ms | 11.333 ms | 40,839,598 | 8.000 KB | N/A | 48.66 % |
| GroupBySkipAny | 12.821 ms | 12.616 ms | 43,599,424 | 2.008 MB | N/A | 45.30 % |
| GroupByCount | 12.903 ms | 12.643 ms | 43,879,671 | 2.008 MB | N/A | 44.95 % |
| HashSet | 23.438 ms | 22.309 ms | 79,514,592 | 8.000 KB | Base | 0.00 % |
--- Scale 1,000,000 -------------------------------------------------------------- Time 6.687 ---
| Dictionary | 119.383 ms | 116.696 ms | 406,335,788 | 8.000 KB | N/A | 51.02 % |
| GroupBySkipAny | 134.819 ms | 131.747 ms | 458,589,071 | 16.008 MB | N/A | 44.68 % |
| GroupByCount | 139.834 ms | 132.961 ms | 476,092,342 | 16.008 MB | N/A | 42.63 % |
| HashSet | 243.722 ms | 239.580 ms | 829,409,546 | 8.000 KB | Base | 0.00 % |
--- Scale 10,000,000 ------------------------------------------------------------- Time 8.579 ---
| Dictionary | 1,237.373 ms | 1,213.387 ms | 4,203,352,967 | 8.000 KB | N/A | 49.48 % |
| GroupByCount | 1,404.209 ms | 1,385.300 ms | 4,778,762,566 | 256.009 MB | N/A | 42.67 % |
| GroupBySkipAny | 1,423.058 ms | 1,384.701 ms | 4,838,714,809 | 256.009 MB | N/A | 41.90 % |
| HashSet | 2,449.190 ms | 2,381.713 ms | 8,334,623,472 | 8.000 KB | Base | 0.00 % |
--- Scale 100,000,000 ----------------------------------------------------------- Time 59.573 ---
| Dictionary | 12,126.807 ms | 11,692.415 ms | 41,233,771,464 | 8.000 KB | N/A | 49.00 % |
| GroupByCount | 13,289.256 ms | 13,062.683 ms | 45,292,203,200 | 2.000 GB | N/A | 44.12 % |
| GroupBySkipAny | 13,760.635 ms | 13,261.366 ms | 46,825,002,767 | 2.000 GB | N/A | 42.13 % |
| HashSet | 23,780.270 ms | 22,785.622 ms | 80,971,187,805 | 8.000 KB | Base | 0.00 % |
-------------------------------------------------------------------------------------------------
--- Random number between 0 and 10 ------------------------------------------------------------
| Value | Average | Fastest | Cycles | Garbage | Test | Gain |
--- Scale 10,000 --------------------------------------------------------------- Time 0.224 ---
| Dictionary | 0.397 ms | 0.387 ms | 1,349,447 | 8.000 KB | N/A | 50.59 % |
| GroupBySkipAny | 0.495 ms | 0.490 ms | 1,683,949 | 200.563 KB | N/A | 38.38 % |
| GroupByCount | 0.506 ms | 0.503 ms | 1,722,584 | 200.563 KB | N/A | 36.97 % |
| HashSet | 0.803 ms | 0.786 ms | 2,734,083 | 8.000 KB | Base | 0.00 % |
--- Scale 100,000 -------------------------------------------------------------- Time 0.459 ---
| Dictionary | 3.929 ms | 3.767 ms | 13,387,884 | 8.000 KB | N/A | 48.65 % |
| GroupBySkipAny | 5.195 ms | 4.873 ms | 17,699,816 | 2.510 MB | N/A | 32.09 % |
| GroupByCount | 5.233 ms | 4.904 ms | 17,825,215 | 2.510 MB | N/A | 31.60 % |
| HashSet | 7.650 ms | 7.444 ms | 26,081,151 | 8.000 KB | Base | 0.00 % |
--- Scale 1,000,000 ------------------------------------------------------------ Time 3.416 ---
| Dictionary | 39.934 ms | 38.031 ms | 136,107,565 | 8.000 KB | N/A | 47.61 % |
| GroupBySkipAny | 52.159 ms | 50.011 ms | 177,797,622 | 20.011 MB | N/A | 31.57 % |
| GroupByCount | 54.562 ms | 49.745 ms | 185,883,905 | 20.011 MB | N/A | 28.42 % |
| HashSet | 76.221 ms | 73.899 ms | 259,702,109 | 8.000 KB | Base | 0.00 % |
--- Scale 10,000,000 ---------------------------------------------------------- Time 33.643 ---
| Dictionary | 396.948 ms | 381.873 ms | 1,352,809,995 | 7.035 KB | N/A | 47.82 % |
| GroupByCount | 519.931 ms | 515.210 ms | 1,771,927,979 | 160.012 MB | N/A | 31.66 % |
| GroupBySkipAny | 537.953 ms | 516.127 ms | 1,833,424,578 | 160.013 MB | N/A | 29.29 % |
| HashSet | 760.781 ms | 751.582 ms | 2,592,592,185 | 8.000 KB | Base | 0.00 % |
--- Scale 100,000,000 --------------------------------------------------------- Time 43.701 ---
| Dictionary | 3,945.544 ms | 3,845.146 ms | 13,442,361,467 | 8.000 KB | N/A | 49.08 % |
| GroupByCount | 5,666.408 ms | 5,501.203 ms | 19,301,260,141 | 2.500 GB | N/A | 26.87 % |
| GroupBySkipAny | 5,688.156 ms | 5,536.729 ms | 19,370,101,611 | 2.500 GB | N/A | 26.59 % |
| HashSet | 7,748.656 ms | 7,605.495 ms | 26,399,373,179 | 7.315 KB | Base | 0.00 % |
-----------------------------------------------------------------------------------------------
--- All Duplicates ----------------------------------------------------------------------------
| Value | Average | Fastest | Cycles | Garbage | Test | Gain |
--- Scale 10,000 --------------------------------------------------------------- Time 0.957 ---
| Dictionary | 0.444 ms | 0.425 ms | 1,508,780 | 8.000 KB | N/A | 50.55 % |
| GroupBySkipAny | 0.561 ms | 0.543 ms | 1,912,069 | 264.211 KB | N/A | 37.44 % |
| GroupByCount | 0.566 ms | 0.544 ms | 1,927,506 | 264.211 KB | N/A | 36.93 % |
| HashSet | 0.897 ms | 0.876 ms | 3,058,602 | 8.000 KB | Base | 0.00 % |
--- Scale 100,000 -------------------------------------------------------------- Time 0.341 ---
| Dictionary | 4.342 ms | 4.195 ms | 14,764,603 | 8.000 KB | N/A | 56.79 % |
| GroupBySkipAny | 6.090 ms | 5.437 ms | 20,636,444 | 2.008 MB | N/A | 39.38 % |
| GroupByCount | 6.327 ms | 5.812 ms | 21,478,886 | 2.008 MB | N/A | 37.03 % |
| HashSet | 10.047 ms | 8.627 ms | 34,243,915 | 8.000 KB | Base | 0.00 % |
--- Scale 1,000,000 ------------------------------------------------------------ Time 2.962 ---
| Dictionary | 45.242 ms | 42.814 ms | 154,054,415 | 8.000 KB | N/A | 54.02 % |
| GroupBySkipAny | 58.574 ms | 53.289 ms | 199,411,629 | 16.008 MB | N/A | 40.47 % |
| GroupByCount | 63.450 ms | 54.705 ms | 215,792,787 | 16.008 MB | N/A | 35.52 % |
| HashSet | 98.396 ms | 85.450 ms | 335,093,581 | 8.000 KB | Base | 0.00 % |
--- Scale 10,000,000 ---------------------------------------------------------- Time 28.309 ---
| Dictionary | 432.955 ms | 424.321 ms | 1,474,860,339 | 8.000 KB | N/A | 49.97 % |
| GroupByCount | 600.265 ms | 581.515 ms | 2,044,282,844 | 256.009 MB | N/A | 30.64 % |
| GroupBySkipAny | 603.112 ms | 581.099 ms | 2,054,976,446 | 256.009 MB | N/A | 30.31 % |
| HashSet | 865.449 ms | 854.386 ms | 2,949,024,388 | 8.000 KB | Base | 0.00 % |
--- Scale 100,000,000 --------------------------------------------------------- Time 38.508 ---
| Dictionary | 4,394.937 ms | 4,261.656 ms | 14,973,292,181 | 8.000 KB | N/A | 50.11 % |
| GroupBySkipAny | 5,799.055 ms | 5,718.249 ms | 19,758,314,574 | 2.000 GB | N/A | 34.16 % |
| GroupByCount | 5,909.234 ms | 5,781.676 ms | 20,126,526,198 | 2.000 GB | N/A | 32.91 % |
| HashSet | 8,808.441 ms | 8,617.298 ms | 30,010,947,763 | 8.000 KB | Base | 0.00 % |
-----------------------------------------------------------------------------------------------
public class Dictionary : Benchmark<List<string>, List<string>>
{
public static IEnumerable<string> GetDistinctDuplicates(IEnumerable<string> original)
{
var dict = new Dictionary<string, bool>();
foreach (var s in original)
if (dict.TryGetValue(s, out var isReturnedDupl))
{
if (isReturnedDupl)
continue;
dict[s] = true;
yield return s;
}
else
dict.Add(s, false);
}
protected override List<string> InternalRun()
{
return GetDistinctDuplicates(Input).ToList();
}
}
public class GroupBy : Benchmark<List<string>, List<string>>
{
protected override List<string> InternalRun()
{
return Input.GroupBy(x => x)
.Where(g => g.Count() > 1)
.Select(y => y.Key)
.ToList();
}
}
public class GroupBy2 : Benchmark<List<string>, List<string>>
{
protected override List<string> InternalRun()
{
return Input.GroupBy(x => x)
.Where(g => g.Count() > 1)
.Select(y => y.Key)
.ToList();
}
}