C# 如何最有效地（快速）匹配两个列表？_C#_.net_Performance_Optimization_Search

C# 如何最有效地（快速）匹配两个列表？
c# .net performance optimization search
C# 如何最有效地（快速）匹配两个列表？,c#,.net,performance,optimization,search,C#,.net,Performance,Optimization,Search,我有两个项目、来源和目标的列表。源列表中的项在目标列表中将有0到n个匹配项，但不会有重复的匹配项考虑到两个列表都已排序，您将如何在性能方面最有效地进行匹配例如： source = {"1", "2", "A", "B", ...} target = {"1 - new music", "1 / classic", "1 | pop", "2 edit", "2 no edit", "A - sing", "B (listen)", ...} 基本上，匹配是简单的前缀匹配，但是假设您有一个名
我有两个项目、来源和目标的
列表。源列表中的项在目标列表中将有0到n个匹配项，但不会有重复的匹配项
考虑到两个列表都已排序，您将如何在性能方面最有效地进行匹配
例如：
source = {"1", "2", "A", "B", ...}
target = {"1 - new music", "1 / classic", "1 | pop", "2 edit", "2 no edit", "A - sing", "B (listen)", ...}

基本上，匹配是简单的前缀匹配，但是假设您有一个名为MatchName
的方法。如果要进行更多优化搜索，可以使用新函数NameMatch
仅比较两个字符串并返回布尔值
最终，源[0]将具有源[0]。在这种情况下，匹配将包含目标[0、1和2]。我认为最好的方法是准备一个索引。像这样（Javascript）
在这种情况下，实际上并不需要排序良好的列表。经过编辑、重写、未测试的列表应该具有O（源+目标）性能。
用法可以是MatchMaker.Match（源，目标）.ToList（）
我反对过早优化。
我不确定这是否值得尝试优化。您可以用它实现某种二进制搜索，但其有效性相当有限。我们在谈论多少元素
目标中没有不匹配的元素
假设列表已排序，且目标
中不存在无法与源
匹配的元素：
static List<string>[] FindMatches(string[] source, string[] target)
{
    // Initialize array to hold results
    List<string>[] matches = new List<string>[source.Length];
    for (int i = 0; i < matches.Length; i++)
        matches[i] = new List<string>();

    int s = 0;
    for (int t = 0; t < target.Length; t++)
    {
        while (!MatchName(source[s], target[t]))
        {
            s++;
            if (s >= source.Length)
                return matches;
        }

        matches[s].Add(target[t]);
    }

    return matches;
}

这两者在其他方面基本相同。如您所见，在目标元素中循环一次，当您不再找到匹配项时，将索引前进到源数组
如果源元素的数量有限，那么进行一次更智能的搜索可能是值得的。如果源元素的数量也很大，那么假定的好处就会减少
然后，在调试模式下，在我的机器上，第一个算法需要0.18秒，目标元素100万个。第二个更快（0.03秒），但这是因为正在进行的比较更简单。可能您必须将所有内容与第一个空格字符进行比较，使其速度大大降低。
很明显，一旦超过当前源前缀，您就会停止在目标列表上循环。在这种情况下，您最好使用前缀方法，而不是匹配的方法，这样您就可以知道当前前缀是什么，并在经过它时停止搜索目标。
在对项目进行排序时，您可以在列表中循环：
string[] source = {"1", "2", "A", "B" };
string[] target = { "1 - new music", "1 / classic", "1 | pop", "2 edit", "2 no edit", "A - sing", "B (listen)" };

List<string>[] matches = new List<string>[source.Length];
int targetIdx = 0;
for (int sourceIdx = 0; sourceIdx < source.Length; sourceIdx++) {
   matches[sourceIdx] = new List<string>();
   while (targetIdx < target.Length && NameMatch(source[sourceIdx], target[targetIdx])) {
      matches[sourceIdx].Add(target[targetIdx]);
      targetIdx++;
   }
}

string[]source={“1”、“2”、“A”、“B”}；
string[]target={“1-新音乐”，“1/经典”，“1 |流行音乐”，“2编辑”，“2不编辑”，“A-唱”，“B（听）”}；
List[]matches=新列表[source.Length]；
int targetIdx=0；
for（int-sourceIdx=0；sourceIdx
这里有一个答案，它只在两个列表中循环一次，使用逻辑将两个列表都作为优化排序。正如大多数人所说，我不会太担心优化，因为这些答案中的任何一个都可能足够快，我会选择最具可读性和可维护性的解决方案
话虽如此，我需要和我的咖啡做点什么，给你。下面的优点之一是，它允许目标列表中的内容与源列表中的内容不匹配，尽管我不确定您是否需要该功能
class Program
{
    public class Source
    {
        private readonly string key;
        public string Key { get { return key;}}

        private readonly List<string> matches = new List<string>();
        public List<string> Matches { get { return matches;} }

        public Source(string key)
        {
            this.key = key;
        }
    }

    static void Main(string[] args)
    {
        var sources = new List<Source> {new Source("A"), new Source("C"), new Source("D")};
        var targets = new List<string> { "A1", "A2", "B1", "C1", "C2", "C3", "D1", "D2", "D3", "E1" };

        var ixSource = 0;
        var currentSource = sources[ixSource++];

        foreach (var target in targets)
        {
            var compare = CompareSourceAndTarget(currentSource, target);

            if (compare > 0)
                continue;

            // Try and increment the source till we have one that matches 
            if (compare < 0)
            {
                while ((ixSource < sources.Count) && (compare < 0))
                {
                    currentSource = sources[ixSource++];
                    compare = CompareSourceAndTarget(currentSource, target);
                }
            }

            if (compare == 0)
            {
                currentSource.Matches.Add(target);
            }

            // no more sources to match against
            if ((ixSource > sources.Count))
                break;
        }

        foreach (var source in sources)
        {
            Console.WriteLine("source {0} had matches {1}", source.Key, String.Join(" ", source.Matches.ToArray()));
        }
    }

    private static int CompareSourceAndTarget(Source source, string target)
    {
        return String.Compare(source.Key, target.Substring(0, source.Key.Length), StringComparison.OrdinalIgnoreCase);
    }
}

类程序
{
公共类源
{
私有只读字符串密钥；
公共字符串密钥{get{return Key；}}
私有只读列表匹配项=新列表（）；
公共列表匹配{get{return Matches；}}
公共源（字符串键）
{
this.key=key；
}
}
静态void Main（字符串[]参数）
{
var sources=新列表{新源（“A”）、新源（“C”）、新源（“D”）}；
var目标=新列表{“A1”、“A2”、“B1”、“C1”、“C2”、“C3”、“D1”、“D2”、“D3”、“E1”}；
var ixSource=0；
var currentSource=源[ixSource++]；
foreach（目标中的var目标）
{
var compare=比较源和目标（currentSource，target）；
如果（比较>0）
继续；
//尝试增加源，直到有一个匹配的源
如果（比较<0）
{
而（（ixSourcesources.Count））
打破
}
foreach（源中的var源）
{
WriteLine（“source{0}有匹配项{1}”、source.Key、String.Join（“、source.matches.ToArray（））；
}
}
私有静态int CompareSourceAndTarget（源源、字符串目标）
{
返回String.Compare（source.Key，target.Substring（0，source.Key.Length），StringComparison.OrdinalIgnoreCase）；
}
}
既然它们是排序的，那么它不就是一个基本的O（N）合并循环吗
ia = ib = 0;
while(ia < na && ib < nb){
  if (A[ia] < B[ib]){
    // A[ia] is unmatched
    ia++;
  }
  else if (B[ib] < A[ia]){
    // B[ib] is unmatched
    ib++;
  }
  else {
    // A[ia] matches B[ib]
    ia++;
    ib++;
  }
}
while(ia < na){
  // A[ia] is unmatched
  ia++;
}
while(ib < nb){
  // B[ib] is unmatched
  ib++;
}

ia=ib=0；
而（ia这与您的另一个问题本质上不完全相同吗：我想在这一个问题中，您可以根据元素排序的知识添加更多优化。它们相似，但
static List<string>[] FindMatches(string[] source, string[] target)
{
    // Initialize array to hold results
    List<string>[] matches = new List<string>[source.Length];
    for (int i = 0; i < matches.Length; i++)
        matches[i] = new List<string>();

    int s = 0;
    for (int t = 0; t < target.Length; t++)
    {
        int m = CompareName(source[s], target[t]);
        if (m == 0)
        {
            matches[s].Add(target[t]);
        }
        else if (m > 0)
        {
            s++;
            if (s >= source.Length)
                return matches;
            t--;
        }
    }

    return matches;
}

static int CompareName(string source, string target)
{
    // Whatever comparison you need here, this one is really basic :)
    return target[0] - source[0];
}

string[] source = {"1", "2", "A", "B" };
string[] target = { "1 - new music", "1 / classic", "1 | pop", "2 edit", "2 no edit", "A - sing", "B (listen)" };

List<string>[] matches = new List<string>[source.Length];
int targetIdx = 0;
for (int sourceIdx = 0; sourceIdx < source.Length; sourceIdx++) {
   matches[sourceIdx] = new List<string>();
   while (targetIdx < target.Length && NameMatch(source[sourceIdx], target[targetIdx])) {
      matches[sourceIdx].Add(target[targetIdx]);
      targetIdx++;
   }
}

class Program
{
    public class Source
    {
        private readonly string key;
        public string Key { get { return key;}}

        private readonly List<string> matches = new List<string>();
        public List<string> Matches { get { return matches;} }

        public Source(string key)
        {
            this.key = key;
        }
    }

    static void Main(string[] args)
    {
        var sources = new List<Source> {new Source("A"), new Source("C"), new Source("D")};
        var targets = new List<string> { "A1", "A2", "B1", "C1", "C2", "C3", "D1", "D2", "D3", "E1" };

        var ixSource = 0;
        var currentSource = sources[ixSource++];

        foreach (var target in targets)
        {
            var compare = CompareSourceAndTarget(currentSource, target);

            if (compare > 0)
                continue;

            // Try and increment the source till we have one that matches 
            if (compare < 0)
            {
                while ((ixSource < sources.Count) && (compare < 0))
                {
                    currentSource = sources[ixSource++];
                    compare = CompareSourceAndTarget(currentSource, target);
                }
            }

            if (compare == 0)
            {
                currentSource.Matches.Add(target);
            }

            // no more sources to match against
            if ((ixSource > sources.Count))
                break;
        }

        foreach (var source in sources)
        {
            Console.WriteLine("source {0} had matches {1}", source.Key, String.Join(" ", source.Matches.ToArray()));
        }
    }

    private static int CompareSourceAndTarget(Source source, string target)
    {
        return String.Compare(source.Key, target.Substring(0, source.Key.Length), StringComparison.OrdinalIgnoreCase);
    }
}

ia = ib = 0;
while(ia < na && ib < nb){
  if (A[ia] < B[ib]){
    // A[ia] is unmatched
    ia++;
  }
  else if (B[ib] < A[ia]){
    // B[ib] is unmatched
    ib++;
  }
  else {
    // A[ia] matches B[ib]
    ia++;
    ib++;
  }
}
while(ia < na){
  // A[ia] is unmatched
  ia++;
}
while(ib < nb){
  // B[ib] is unmatched
  ib++;
}