多次使用C#比较两个巨大的列表(有一点扭曲)
大家好,你们这里的社区很棒。我是一名电气工程师,兼职做一些“编程”工作来帮助支付账单。我这样说是因为我想让你们考虑到我没有受过适当的计算机科学培训,但我在过去的7年里一直在编码 我有几个excel表格,里面有信息(都是数字),基本上是一列中的“已拨电话号码”,另一列中的每一个号码对应的分钟数。另外,我有一份针对我国不同运营商的“运营商前缀代码”列表。我想做的是将每个运营商的所有“流量”分开。以下是场景: 第一个拨号号码行:123456789ABCD,100也许在数据库中而不是在C#中这样做更简单(不一定更有效) 您可以在数据库中插入行,并在插入时确定载体并将其包含在记录中(可能在插入触发器中)多次使用C#比较两个巨大的列表(有一点扭曲),c#,list,compare,matching,C#,List,Compare,Matching,大家好,你们这里的社区很棒。我是一名电气工程师,兼职做一些“编程”工作来帮助支付账单。我这样说是因为我想让你们考虑到我没有受过适当的计算机科学培训,但我在过去的7年里一直在编码 我有几个excel表格,里面有信息(都是数字),基本上是一列中的“已拨电话号码”,另一列中的每一个号码对应的分钟数。另外,我有一份针对我国不同运营商的“运营商前缀代码”列表。我想做的是将每个运营商的所有“流量”分开。以下是场景: 第一个拨号号码行:123456789ABCD,100也许在数据库中而不是在C#中这样做更简单
然后,您的报告将是表上的一个求和查询。我觉得您需要根据运营商前缀构建一个查询。您将得到一个trie,其中终止节点告诉您该前缀的载波 然后从carrier到
int
或long
(总数)创建一个字典
然后,对于每一个已拨号码行,只需沿着trie往下走,直到找到运营商。找到运营商到目前为止的总分钟数,并添加当前行-然后继续。最简单的数据结构是一个集合列表。为每个载体设置一个包含所有前缀的集合 现在,要将呼叫与运营商关联:
foreach (Carrier carrier in carriers)
{
bool found = false;
for (int length = 1; length <= 7; length++)
{
int prefix = ExtractDigits(callNumber, length);
if (carrier.Prefixes.Contains(prefix))
{
carrier.Calls.Add(callNumber);
found = true;
break;
}
}
if (found)
break;
}
foreach(载波中的载波)
{
bool-found=false;
对于(int length=1;length我可能只是将条目放入一个列表中,对其进行排序,然后使用a来查找匹配项。定制二进制搜索匹配条件以返回第一个匹配项,然后沿列表迭代,直到找到一个不匹配项。二进制搜索只需大约15次比较即可搜索30000个条目的列表 您可能想在C#中使用a
这样,您就有了键-值对,您的键可以是电话号码,您的值可以是总分钟数。如果在键集中找到匹配项,请修改总分钟数,否则,请添加新键
然后,您只需修改搜索算法,不查看整个密钥,而只查看其前7位。更新
你可以做一个简单的技巧——将前缀按首位数分组到一个字典中,并只根据正确的子集匹配数字
const Int32 minimumPrefixLength = 3;
var groupedPefixes = prefixes
.GroupBy(p => p.Substring(0, minimumPrefixLength))
.ToDictionary(g => g.Key, g => g);
var numberPrefixes = numbers
.Select(n => groupedPefixes[n.Substring(0, minimumPrefixLength)]
.First(n.StartsWith))
.ToList();
那么这有多快呢?15000个前缀和50000个数字用了不到250毫秒。足够快,可以写两行代码吗
请注意,性能在很大程度上取决于最小前缀长度(MPL),因此取决于可以构造的前缀组的数量
MPL Runtime
-----------------
1 10.198 ms
2 1.179 ms
3 205 ms
4 130 ms
5 107 ms
嗯,在我的2.0 GHz Core 2 Duo笔记本电脑上花了大约一分钟。因此,如果一分钟的处理时间是可以接受的,如果包括聚合,可能是两到三分钟,我不会尝试优化任何东西。当然,如果程序可以在一两秒钟内完成任务,那将非常好,但这将增加相当多的复杂性和许多事情设计、编写和测试都需要时间。LINQ语句只花了我几秒钟的时间
测试应用程序
请注意,生成许多前缀非常慢,可能需要一两分钟
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Text;
namespace Test
{
static class Program
{
static void Main()
{
// Set number of prefixes and calls to not more than 50 to get results
// printed to the console.
Console.Write("Generating prefixes");
List<String> prefixes = Program.GeneratePrefixes(5, 10, 15);
Console.WriteLine();
Console.Write("Generating calls");
List<Call> calls = Program.GenerateCalls(prefixes, 5, 10, 50);
Console.WriteLine();
Console.WriteLine("Processing started.");
Stopwatch stopwatch = new Stopwatch();
const Int32 minimumPrefixLength = 5;
stopwatch.Start();
var groupedPefixes = prefixes
.GroupBy(p => p.Substring(0, minimumPrefixLength))
.ToDictionary(g => g.Key, g => g);
var result = calls
.GroupBy(c => groupedPefixes[c.Number.Substring(0, minimumPrefixLength)]
.First(c.Number.StartsWith))
.Select(g => new Call(g.Key, g.Sum(i => i.Duration)))
.ToList();
stopwatch.Stop();
Console.WriteLine("Processing finished.");
Console.WriteLine(stopwatch.Elapsed);
if ((prefixes.Count <= 50) && (calls.Count <= 50))
{
Console.WriteLine("Prefixes");
foreach (String prefix in prefixes.OrderBy(p => p))
{
Console.WriteLine(String.Format(" prefix={0}", prefix));
}
Console.WriteLine("Calls");
foreach (Call call in calls.OrderBy(c => c.Number).ThenBy(c => c.Duration))
{
Console.WriteLine(String.Format(" number={0} duration={1}", call.Number, call.Duration));
}
Console.WriteLine("Result");
foreach (Call call in result.OrderBy(c => c.Number))
{
Console.WriteLine(String.Format(" prefix={0} accumulated duration={1}", call.Number, call.Duration));
}
}
Console.ReadLine();
}
private static List<String> GeneratePrefixes(Int32 minimumLength, Int32 maximumLength, Int32 count)
{
Random random = new Random();
List<String> prefixes = new List<String>(count);
StringBuilder stringBuilder = new StringBuilder(maximumLength);
while (prefixes.Count < count)
{
stringBuilder.Length = 0;
for (int i = 0; i < random.Next(minimumLength, maximumLength + 1); i++)
{
stringBuilder.Append(random.Next(10));
}
String prefix = stringBuilder.ToString();
if (prefixes.Count % 1000 == 0)
{
Console.Write(".");
}
if (prefixes.All(p => !p.StartsWith(prefix) && !prefix.StartsWith(p)))
{
prefixes.Add(stringBuilder.ToString());
}
}
return prefixes;
}
private static List<Call> GenerateCalls(List<String> prefixes, Int32 minimumLength, Int32 maximumLength, Int32 count)
{
Random random = new Random();
List<Call> calls = new List<Call>(count);
StringBuilder stringBuilder = new StringBuilder();
while (calls.Count < count)
{
stringBuilder.Length = 0;
stringBuilder.Append(prefixes[random.Next(prefixes.Count)]);
for (int i = 0; i < random.Next(minimumLength, maximumLength + 1); i++)
{
stringBuilder.Append(random.Next(10));
}
if (calls.Count % 1000 == 0)
{
Console.Write(".");
}
calls.Add(new Call(stringBuilder.ToString(), random.Next(1000)));
}
return calls;
}
private class Call
{
public Call (String number, Decimal duration)
{
this.Number = number;
this.Duration = duration;
}
public String Number { get; private set; }
public Decimal Duration { get; private set; }
}
}
}
使用系统;
使用System.Collections.Generic;
使用系统诊断;
使用System.Linq;
使用系统文本;
名称空间测试
{
静态类程序
{
静态void Main()
{
//将前缀和调用数设置为不超过50以获得结果
//打印到控制台。
Console.Write(“生成前缀”);
列表前缀=Program.GeneratePrefixes(5,10,15);
Console.WriteLine();
Console.Write(“生成调用”);
列表调用=Program.GenerateCalls(前缀5、10、50);
Console.WriteLine();
WriteLine(“处理已启动”);
秒表秒表=新秒表();
常数Int32最小umpRefiXlength=5;
秒表。开始();
var groupedPefixes=前缀
.GroupBy(p=>p.Substring(0,最小umprefixlength))
.ToDictionary(g=>g.Key,g=>g);
var result=调用
.GroupBy(c=>groupedPefixes[c.Number.Substring(0,minimumPrefixLength)]
.第一个(c.编号.起始带))
.Select(g=>newcall(g.Key,g.Sum(i=>i.Duration)))
.ToList();
秒表;
Console.WriteLine(“处理完成”);
控制台写入线(秒表已过);
if((前缀.Count c.Number).ThenBy(c=>c.Duration))
{
WriteLine(String.Format(“number={0}duration={1}”,call.number,call.duration));
}
控制台。写入线(“结果”);
foreach(调用result.OrderBy(c=>c.Number))
{
WriteLine(String.Format(“前缀={0}累计持续时间={1}”,call.Number,call.duration));
}
}
Console.ReadLine();
}
专用静态列表生成器引用(Int32 minimumLength、Int32 maximumLength、Int32 count)
{
随机=新随机();
列表前缀=新列表(计数);
StringBuilder StringBuilder=新StringBuilder(最大长度);
while(前缀.Countvar numberPrefixes = numbers.Select(n => prefixes.First(n.StartsWith)).ToList();
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Text;
namespace Test
{
static class Program
{
static void Main()
{
// Set number of prefixes and calls to not more than 50 to get results
// printed to the console.
Console.Write("Generating prefixes");
List<String> prefixes = Program.GeneratePrefixes(5, 10, 15);
Console.WriteLine();
Console.Write("Generating calls");
List<Call> calls = Program.GenerateCalls(prefixes, 5, 10, 50);
Console.WriteLine();
Console.WriteLine("Processing started.");
Stopwatch stopwatch = new Stopwatch();
const Int32 minimumPrefixLength = 5;
stopwatch.Start();
var groupedPefixes = prefixes
.GroupBy(p => p.Substring(0, minimumPrefixLength))
.ToDictionary(g => g.Key, g => g);
var result = calls
.GroupBy(c => groupedPefixes[c.Number.Substring(0, minimumPrefixLength)]
.First(c.Number.StartsWith))
.Select(g => new Call(g.Key, g.Sum(i => i.Duration)))
.ToList();
stopwatch.Stop();
Console.WriteLine("Processing finished.");
Console.WriteLine(stopwatch.Elapsed);
if ((prefixes.Count <= 50) && (calls.Count <= 50))
{
Console.WriteLine("Prefixes");
foreach (String prefix in prefixes.OrderBy(p => p))
{
Console.WriteLine(String.Format(" prefix={0}", prefix));
}
Console.WriteLine("Calls");
foreach (Call call in calls.OrderBy(c => c.Number).ThenBy(c => c.Duration))
{
Console.WriteLine(String.Format(" number={0} duration={1}", call.Number, call.Duration));
}
Console.WriteLine("Result");
foreach (Call call in result.OrderBy(c => c.Number))
{
Console.WriteLine(String.Format(" prefix={0} accumulated duration={1}", call.Number, call.Duration));
}
}
Console.ReadLine();
}
private static List<String> GeneratePrefixes(Int32 minimumLength, Int32 maximumLength, Int32 count)
{
Random random = new Random();
List<String> prefixes = new List<String>(count);
StringBuilder stringBuilder = new StringBuilder(maximumLength);
while (prefixes.Count < count)
{
stringBuilder.Length = 0;
for (int i = 0; i < random.Next(minimumLength, maximumLength + 1); i++)
{
stringBuilder.Append(random.Next(10));
}
String prefix = stringBuilder.ToString();
if (prefixes.Count % 1000 == 0)
{
Console.Write(".");
}
if (prefixes.All(p => !p.StartsWith(prefix) && !prefix.StartsWith(p)))
{
prefixes.Add(stringBuilder.ToString());
}
}
return prefixes;
}
private static List<Call> GenerateCalls(List<String> prefixes, Int32 minimumLength, Int32 maximumLength, Int32 count)
{
Random random = new Random();
List<Call> calls = new List<Call>(count);
StringBuilder stringBuilder = new StringBuilder();
while (calls.Count < count)
{
stringBuilder.Length = 0;
stringBuilder.Append(prefixes[random.Next(prefixes.Count)]);
for (int i = 0; i < random.Next(minimumLength, maximumLength + 1); i++)
{
stringBuilder.Append(random.Next(10));
}
if (calls.Count % 1000 == 0)
{
Console.Write(".");
}
calls.Add(new Call(stringBuilder.ToString(), random.Next(1000)));
}
return calls;
}
private class Call
{
public Call (String number, Decimal duration)
{
this.Number = number;
this.Duration = duration;
}
public String Number { get; private set; }
public Decimal Duration { get; private set; }
}
}
}
CREATE TABLE dbo.dialled_numbers ( number VARCHAR(100), minutes INT )
CREATE TABLE dbo.prefixes ( prefix VARCHAR(100) )
-- now populate the tables, create indexes etc
-- and then just run your query...
SELECT p.prefix,
SUM(n.minutes) AS total_minutes
FROM dbo.dialled_numbers AS n
INNER JOIN dbo.prefixes AS p
ON n.number LIKE p.prefix + '%'
GROUP BY p.prefix