C# 反向映射集合的性能_C#_Performance_Linq

C# 反向映射集合的性能

c# performance linq

C# 反向映射集合的性能,c#,performance,linq,C#,Performance,Linq,我有一个对象集合，其中每个对象都有一个唯一的字符串Id，任何其他对象都可以包含指向另一个对象的完全任意（多对一）“链接”。我还希望能够生成一个“使用图”，它是反向索引——给定任何一个对象，哪些其他对象直接链接到它或它的子对象？（此处“子对象”定义为具有匹配前缀Id的任何对象，因为Id是点路径表示法。）因此，Baz.Boz可能是链接到Foo.Bar的一个对象——然后使用图应该反映Foo和Foo.Bar（但不是Foo.Bob）都被Baz.Boz使用这是用于计算使用率映射的代码： // build

我有一个对象集合，其中每个对象都有一个唯一的字符串Id，任何其他对象都可以包含指向另一个对象的完全任意（多对一）“链接”。我还希望能够生成一个“使用图”，它是反向索引——给定任何一个对象，哪些其他对象直接链接到它或它的子对象？（此处“子对象”定义为具有匹配前缀Id的任何对象，因为Id是点路径表示法。）

因此，

Baz.Boz

可能是链接到

Foo.Bar

的一个对象——然后使用图应该反映

Foo

和

Foo.Bar

（但不是

Foo.Bob

）都被

Baz.Boz

使用

这是用于计算使用率映射的代码：

// builds Id => links that link to Id or one of Id's children (by prefix)
public IDictionary<string, IList<Link>> CalculateUsageMap()
{
    var all = All();
    var links = all.Values
        .SelectMany(o => o.Links ?? Enumerable.Empty<Link>())
        .ToList();
    return all.Keys.ToDictionary(i => i, i => links.Where(k => IsLinkedTo(k, i)).ToList());
    // this last line is very slow
}

private static bool IsLinkedTo(Link link, string candidateId)
{
    return !string.IsNullOrEmpty(link.TargetId)
        && !string.IsNullOrEmpty(candidateId)
        && link.TargetId.StartsWith(candidateId, StringComparison.Ordinal);
}

//构建Id=>链接到Id或Id的子项之一的链接（按前缀）
公共IDictionary CalculateUsageMap（）
{
var all=all（）；
var links=all.Values
.SelectMany（o=>o.Links？？可枚举的.Empty（））
.ToList（）；
返回all.Keys.ToDictionary（i=>i，i=>links.Where（k=>IsLinkedTo（k，i））.ToList（））；
//最后一行很慢
}
私有静态bool IsLinkedTo（链接，字符串候选ID）
{
return！string.IsNullOrEmpty（link.TargetId）
&&！string.IsNullOrEmpty（candidateId）
&&link.TargetId.StartsWith（candidateId，StringComparison.Ordinal）；
}

这是它背后的支撑结构：

public interface ILinkable
{
    string Id { get; }
    IEnumerable<ILinkable> Children { get; }
    IEnumerable<Link> Links { get; }
}

public class Link
{
    public string Name { get; }
    public ILinkable Source { get; } // our immediate owner
    public string TargetId { get; }
    // plus constructors etc that's irrelevant at present
}

public ILinkable Root { get; }

public IDictionary<string, ILinkable> All()
{
    var tree = new Dictionary<string, ILinkable>();
    AddWithDescendants(tree, Root);
    return tree;
}

private static void AddWithDescendants(IDictionary<string, ILinkable> tree, ILinkable obj)
{
    tree.Add(obj.Id, obj);

    foreach (var child in obj.Children ?? Enumerable.Empty<ILinkable>())
    {
        AddWithDescendants(tree, child);
    }
}

公共接口不可升级
{
字符串Id{get；}
IEnumerable子项{get；}
IEnumerable链接{get；}
}
公共类链接
{
公共字符串名称{get；}
可公开访问的源{get；}//我们的直接所有者
公共字符串TargetId{get；}
//加上目前不相关的构造函数等
}
公共可链接根{get；}
公共词典全部（）
{
var tree=newdictionary（）；
添加子代（树、根）；
回归树；
}
私有静态void addWithSubstands（IDictionary树，ILinkable obj）
{
添加（obj.Id，obj）；
foreach（obj.Children中的变量child？？Enumerable.Empty（））
{
添加子代（树、子代）；
}
}

这是可行的，但是在一个有14k个对象和3k个链接（产生20k个使用）的树中，这需要5s才能生成，这比我想要的要长。（我已经检查了

All（）

并计算了

链接

基本上不需要时间；所有这些都花在

ToDictionary

中）

有什么方法可以提高这一行的性能吗？我的第一个想法是使用类似于

GroupJoin

的东西，但是由于我们是在前缀相等而不是实际相等的基础上“加入”的，所以这并没有真正起作用。我更愿意将其保存在纯代码中，而不涉及数据库

（我曾尝试为

GroupJoin

编写一个自定义相等比较器，但结果既慢又错误，使用量输出只有~7k。而且这是一个可疑的匹配，因为这是一个不对称匹配，而相等比较器假设对称。）

此代码的时间复杂性

return all.Keys.ToDictionary(i => i, i => links.Where(k => IsLinkedTo(k, i)).ToList());

是二次的

O（N*M）

，其中

是

all.Keys.Count

和

是

links.Count

。因此，毫不奇怪，它是缓慢的

由于您试图实现的基本目标是找到所有

ILinkable.Id

都是

Link.TargetId

的前缀，因此您需要为此类操作优化的高效数据结构。这样的数据结构存在并被调用。以下是针对您的案例的快速实施：

class ItemMap : IReadOnlyCollection<KeyValuePair<string, IReadOnlyList<Link>>>
{
    class Node
    {
        public Node(char key) => Key = key;
        public char Key { get; }
        public NodeMap Children;
        public ILinkable Item;
        public List<Link> Links;
        public IReadOnlyList<Link> ItemLinks => Links ?? (Item != null ? NoLinks : null);
        public static IReadOnlyList<Link> NoLinks => Array.Empty<Link>(); 
    }

    struct NodeMap
    {
        Dictionary<char, Node> items;
        public IEnumerable<Node> Items => items?.Values;
        public bool TryGetItem(char key, out Node item, bool create = false)
        {
            item = null;
            if ((items == null || !items.TryGetValue(key, out item)) && create)
                (items ?? (items = new Dictionary<char, Node>())).Add(key, item = new Node(key));
            return item != null;
        }
    }

    NodeMap RootNodes;

    IEnumerable<Node> Nodes
        => RootNodes.Items?.Expand(e => e.Children.Items) ?? Enumerable.Empty<Node>();

    IEnumerable<Node> ItemNodes
        => Nodes.Where(n => n.Item != null);

    IEnumerable<KeyValuePair<string, IReadOnlyList<Link>>> Items
        => ItemNodes.Select(n => new KeyValuePair<string, IReadOnlyList<Link>>(n.Item.Id, n.ItemLinks));

    public ItemMap(ILinkable tree)
    {
        if (tree == null) return;
        var items = new[] { tree }.Expand(e => e.Children);
        foreach (var item in items)
            AddItem(item);
        var links = Nodes.Where(n => n.Item?.Links != null).SelectMany(n => n.Item.Links);
        foreach (var link in links)
            AddLink(link);
    }

    void AddItem(ILinkable item)
    {
        var node = GetNode(item.Id, create: true);
        if (node == null) return;
        if (node.Item != null) throw new Exception($"Duplicate key: {item.Id}");
        node.Item = item;
        Count++;
    }

    void AddLink(Link link)
    {
        var key = link.TargetId;
        if (string.IsNullOrEmpty(key)) return;
        ref var nodes = ref RootNodes;
        for (int i = 0; i < key.Length; i++)
        {
            if (!nodes.TryGetItem(key[i], out var node)) break;
            // Add to each item in the prefix path
            if (node.Item != null && node.Item != link.Source)
                (node.Links ?? (node.Links = new List<Link>())).Add(link);
            nodes = ref node.Children;
        }
    }

    Node GetNode(string key, bool create = false)
    {
        if (string.IsNullOrEmpty(key)) return null;
        Node node = null;
        ref var nodes = ref RootNodes;
        for (int i = 0; i < key.Length; i++)
        {
            if (!nodes.TryGetItem(key[i], out node, create)) break;
            nodes = ref node.Children;
        }
        return node;
    }

    public int Count { get; private set; }

    public IEnumerator<KeyValuePair<string, IReadOnlyList<Link>>> GetEnumerator() => Items.GetEnumerator();

    IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
}

但请注意，这甚至不是必需的，因为前缀树可以有效地用作字典。实现只读字典接口非常简单-将类声明更改为

class ItemMap : IReadOnlyDictionary<string, IReadOnlyList<Link>>

上述前缀树实现使用

字典

存储/遍历子节点列表。这可能会占用更多内存。由于整个实现都封装在

NodeMap

中，因此您可以试验不同的数据结构，并测量性能和内存使用情况

例如，以下是另一个实现，它使用

null

、单个

节点

或排序的

列表

作为存储，并使用二进制搜索在排序的列表中按键定位

节点

：

struct NodeMap
{
    object items; // null, Node or sorted List<Node>
    public IEnumerable<Node> Items => items is Node node ? new[] { node } : items as IEnumerable<Node>;
    public bool TryGetItem(char key, out Node item, bool create = false)
    {
        item = null;
        if (items == null)
        {
            if (create) items = item = new Node(key);
        }
        else if (items is Node node)
        {
            if (node.Key == key) item = node;
            else if (create) items = node.Key < key ? new List<Node>(2) { node, (item = new Node(key)) } : new List<Node>(2) { (item = new Node(key)), node };
        }
        else
        {
            var nodeList = (List<Node>)items;
            int lo = 0, hi = nodeList.Count - 1;
            while (lo <= hi)
            {
                int mid = lo + ((hi - lo) >> 1);
                node = nodeList[mid];
                if (node.Key == key) { item = node; break; }
                if (node.Key < key) lo = mid + 1; else hi = mid - 1;
            }
            if (item == null && create) nodeList.Insert(lo, item = new Node(key));
        }
        return item != null;
    }
}

struct节点映射
{
对象项；//空、节点或排序列表
public IEnumerable Items=>Items是Node节点？new[]{Node}：Items是IEnumerable；
公共bool TryGetItem（字符键，out节点项，bool create=false）
{
item=null；
if（items==null）
{
如果（创建）项=项=新节点（键）；
}
else if（项目为节点）
{
如果（node.Key==Key）item=node；
如果（创建）items=node.Key1）；
节点=节点列表[mid]；
如果（node.Key==Key）{item=node；break；}
如果（node.Key

此代码的时间复杂性

return all.Keys.ToDictionary(i => i, i => links.Where(k => IsLinkedTo(k, i)).ToList());

是二次的

O（N*M）

，其中

是

all.Keys.Count

和

是

links.Count

。因此，毫不奇怪，它是缓慢的

由于您试图实现的基本目标是找到所有

ILinkable.Id

都是

Link.TargetId

的前缀，因此您需要为此类操作优化的高效数据结构。这样的数据结构存在并被调用。以下是针对您的案例的快速实施：

class ItemMap : IReadOnlyCollection<KeyValuePair<string, IReadOnlyList<Link>>>
{
    class Node
    {
        public Node(char key) => Key = key;
        public char Key { get; }
        public NodeMap Children;
        public ILinkable Item;
        public List<Link> Links;
        public IReadOnlyList<Link> ItemLinks => Links ?? (Item != null ? NoLinks : null);
        public static IReadOnlyList<Link> NoLinks => Array.Empty<Link>(); 
    }

    struct NodeMap
    {
        Dictionary<char, Node> items;
        public IEnumerable<Node> Items => items?.Values;
        public bool TryGetItem(char key, out Node item, bool create = false)
        {
            item = null;
            if ((items == null || !items.TryGetValue(key, out item)) && create)
                (items ?? (items = new Dictionary<char, Node>())).Add(key, item = new Node(key));
            return item != null;
        }
    }

    NodeMap RootNodes;

    IEnumerable<Node> Nodes
        => RootNodes.Items?.Expand(e => e.Children.Items) ?? Enumerable.Empty<Node>();

    IEnumerable<Node> ItemNodes
        => Nodes.Where(n => n.Item != null);

    IEnumerable<KeyValuePair<string, IReadOnlyList<Link>>> Items
        => ItemNodes.Select(n => new KeyValuePair<string, IReadOnlyList<Link>>(n.Item.Id, n.ItemLinks));

    public ItemMap(ILinkable tree)
    {
        if (tree == null) return;
        var items = new[] { tree }.Expand(e => e.Children);
        foreach (var item in items)
            AddItem(item);
        var links = Nodes.Where(n => n.Item?.Links != null).SelectMany(n => n.Item.Links);
        foreach (var link in links)
            AddLink(link);
    }

    void AddItem(ILinkable item)
    {
        var node = GetNode(item.Id, create: true);
        if (node == null) return;
        if (node.Item != null) throw new Exception($"Duplicate key: {item.Id}");
        node.Item = item;
        Count++;
    }

    void AddLink(Link link)
    {
        var key = link.TargetId;
        if (string.IsNullOrEmpty(key)) return;
        ref var nodes = ref RootNodes;
        for (int i = 0; i < key.Length; i++)
        {
            if (!nodes.TryGetItem(key[i], out var node)) break;
            // Add to each item in the prefix path
            if (node.Item != null && node.Item != link.Source)
                (node.Links ?? (node.Links = new List<Link>())).Add(link);
            nodes = ref node.Children;
        }
    }

    Node GetNode(string key, bool create = false)
    {
        if (string.IsNullOrEmpty(key)) return null;
        Node node = null;
        ref var nodes = ref RootNodes;
        for (int i = 0; i < key.Length; i++)
        {
            if (!nodes.TryGetItem(key[i], out node, create)) break;
            nodes = ref node.Children;
        }
        return node;
    }

    public int Count { get; private set; }

    public IEnumerator<KeyValuePair<string, IReadOnlyList<Link>>> GetEnumerator() => Items.GetEnumerator();

    IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
}

但是请注意，这甚至不是必需的，因为前缀树c

struct NodeMap
{
    object items; // null, Node or sorted List<Node>
    public IEnumerable<Node> Items => items is Node node ? new[] { node } : items as IEnumerable<Node>;
    public bool TryGetItem(char key, out Node item, bool create = false)
    {
        item = null;
        if (items == null)
        {
            if (create) items = item = new Node(key);
        }
        else if (items is Node node)
        {
            if (node.Key == key) item = node;
            else if (create) items = node.Key < key ? new List<Node>(2) { node, (item = new Node(key)) } : new List<Node>(2) { (item = new Node(key)), node };
        }
        else
        {
            var nodeList = (List<Node>)items;
            int lo = 0, hi = nodeList.Count - 1;
            while (lo <= hi)
            {
                int mid = lo + ((hi - lo) >> 1);
                node = nodeList[mid];
                if (node.Key == key) { item = node; break; }
                if (node.Key < key) lo = mid + 1; else hi = mid - 1;
            }
            if (item == null && create) nodeList.Insert(lo, item = new Node(key));
        }
        return item != null;
    }
}

struct NodeMap
{
    object items; // null, Node or sorted List<Node>
    private static readonly IComparer<Node> NodeComparer
        = Comparer<char>.Default.SelectComparer((Node n) => n.Key);
    public IEnumerable<Node> Items => items is Node node
        ? new[] { node } : items as IEnumerable<Node>;
    public bool TryGetItem(char key, out Node item, bool create = false)
    {
        item = null;
        if (items == null)
        {
            if (create) items = item = new Node(key);
        }
        else if (items is Node node)
        {
            if (node.Key == key) item = node;
            else if (create) items = node.Key < key
               ? new List<Node>(2) { node, (item = new Node(key)) }
               : new List<Node>(2) { (item = new Node(key)), node };
        }
        else
        {
            var nodeList = (List<Node>)items;
            var newNode = new Node(key);
            var index = nodeList.BinarySearch(newNode, NodeComparer);
            if (index >= 0) item = nodeList[index];
            else if (create) nodeList.Insert(~index, (item = newNode));
        }
        return item != null;
    }
}

struct NodeMap
{
    object items; // null, Node or sorted List<Node>
    private static readonly IComparer<Node> NodeComparer
        = StringComparer.Ordinal.SelectComparer((Node n) => n.Key);
    public IEnumerable<Node> Items => items is Node node
        ? new[] { node } : items as IEnumerable<Node>;
    public bool TryGetItem(string key, out Node item, bool create = false)
    {
        item = null;
        if (items == null)
        {
            if (create) items = item = new Node(key);
        }
        else if (items is Node node)
        {
            if (node.Key == key) item = node;
            else if (create) items = StringComparer.Ordinal.Compare(node.Key, key) < 0
                ? new List<Node>(2) { node, (item = new Node(key)) }
                : new List<Node>(2) { (item = new Node(key)), node };
        }
        else
        {
            var nodeList = (List<Node>)items;
            var newNode = new Node(key);
            var index = nodeList.BinarySearch(newNode, NodeComparer);
            if (index >= 0) item = nodeList[index];
            else if (create) nodeList.Insert(~index, (item = newNode));
        }
        return item != null;
    }
}

foreach (var segment in key.Split('.'))