Python 创建；“最小连接”；有向无环图_Python_Python 3.x_Networkx_Directed Acyclic Graphs

Python 创建；“最小连接”；有向无环图

python python-3.x

Python 创建；“最小连接”；有向无环图,python,python-3.x,networkx,directed-acyclic-graphs,Python,Python 3.x,Networkx,Directed Acyclic Graphs,我在NetworkX中有一个有向无环简单图现在，对于每条边，该边都有一个“源”和一个“目标”。如果除了此边之外还有一条从“源”到“目标”的路径，那么我想删除此边 NetworkX是否有内置函数来执行此操作我真的不想重新发明轮子 [可选]只有在1的答案为“否”的情况下，实现这一点的最有效算法是什么（对于相当密集的图形）以下是需要清洁的DAG示例：这些节点是： ['termsequence', 'maximumdegree', 'emptymultigraph', 'minimum', '

我在NetworkX中有一个有向无环简单图

现在，对于每条边，该边都有一个“源”和一个“目标”。如果除了此边之外还有一条从“源”到“目标”的路径，那么我想删除此边

NetworkX是否有内置函数来执行此操作

我真的不想重新发明轮子

[可选]只有在1的答案为“否”的情况下，实现这一点的最有效算法是什么（对于相当密集的图形）

以下是需要清洁的DAG示例：

这些节点是：

['termsequence', 'maximumdegree', 'emptymultigraph', 'minimum', 'multiset', 'walk', 'nonemptymultigraph', 'euleriantrail', 'nonnullmultigraph', 'cycle', 'loop', 'abwalk', 'endvertices', 'simplegraph', 'vertex', 'multipletrails', 'edge', 'set', 'stroll', 'union', 'trailcondition', 'nullmultigraph', 'trivialmultigraph', 'sequence', 'multiplepaths', 'path', 'degreevertex', 'onedgesonvertices', 'nontrivialmultigraph', 'adjacentedges', 'adjacentvertices', 'simpleedge', 'maximum', 'multipleloops', 'length', 'circuit', 'class', 'euleriangraph', 'incident', 'minimumdegree', 'orderedpair', 'unique', 'closedwalk', 'multipleedges', 'pathcondition', 'multigraph', 'trail']

边缘是：

[('termsequence', 'endvertices'), ('emptymultigraph', 'nonemptymultigraph'), ('minimum', 'minimumdegree'), ('multiset', 'trailcondition'), ('multiset', 'pathcondition'), ('multiset', 'multigraph'), ('walk', 'length'), ('walk', 'closedwalk'), ('walk', 'abwalk'), ('walk', 'trail'), ('walk', 'endvertices'), ('euleriantrail', 'euleriangraph'), ('loop', 'simplegraph'), ('loop', 'degreevertex'), ('loop', 'simpleedge'), ('loop', 'multipleloops'), ('endvertices', 'abwalk'), ('vertex', 'adjacentvertices'), ('vertex', 'onedgesonvertices'), ('vertex', 'walk'), ('vertex', 'adjacentedges'), ('vertex', 'multipleedges'), ('vertex', 'edge'), ('vertex', 'multipleloops'), ('vertex', 'degreevertex'), ('vertex', 'incident'), ('edge', 'adjacentvertices'), ('edge', 'onedgesonvertices'), ('edge', 'multipleedges'), ('edge', 'simpleedge'), ('edge', 'adjacentedges'), ('edge', 'loop'), ('edge', 'trailcondition'), ('edge', 'pathcondition'), ('edge', 'walk'), ('edge', 'incident'), ('set', 'onedgesonvertices'), ('set', 'edge'), ('union', 'multiplepaths'), ('union', 'multipletrails'), ('trailcondition', 'trail'), ('nullmultigraph', 'nonnullmultigraph'), ('sequence', 'walk'), ('sequence', 'endvertices'), ('path', 'cycle'), ('path', 'multiplepaths'), ('degreevertex', 'maximumdegree'), ('degreevertex', 'minimumdegree'), ('onedgesonvertices', 'multigraph'), ('maximum', 'maximumdegree'), ('circuit', 'euleriangraph'), ('class', 'multiplepaths'), ('class', 'multipletrails'), ('incident', 'adjacentedges'), ('incident', 'degreevertex'), ('incident', 'onedgesonvertices'), ('orderedpair', 'multigraph'), ('closedwalk', 'circuit'), ('closedwalk', 'cycle'), ('closedwalk', 'stroll'), ('pathcondition', 'path'), ('multigraph', 'euleriangraph'), ('multigraph', 'nullmultigraph'), ('multigraph', 'trivialmultigraph'), ('multigraph', 'nontrivialmultigraph'), ('multigraph', 'emptymultigraph'), ('multigraph', 'euleriantrail'), ('multigraph', 'simplegraph'), ('trail', 'path'), ('trail', 'circuit'), ('trail', 'multipletrails')]

所有简单路径

def multiple_paths(G,source,target):
    '''returns True if there are multiple_paths, False otherwise'''
    path_generator = nx.all_simple_paths(G, source=source, target=target)
    counter = 0
    for path in path_generator: #test to see if there are multiple paths
        counter += 1
        if counter >1:  
            break  #instead of breaking, could have return True
    if counter >1:  #counter == 2
        return True
    else:  #counter == 0 or 1
        return False

import networkx as nx
G=nx.DiGraph()
G.add_edges_from([(0,1), (1,2), (1,3), (0,3), (2,3)])
multiple_paths(G,0,1)
> False
multiple_paths(G,0,2) 
> False
multiple_paths(G,0,3)
> True

for edge in G.edges_iter():  #let's do what you're trying to do
    if multiple_paths(G, edge[0], edge[1]):
        G.remove_edge(edge[0],edge[1])

G.edges()
> [(0, 1), (1, 2), (2, 3)]

has_path

import networkx as nx
G=nx.DiGraph()
G.add_edges_from([(0,1), (1,2), (1,3), (0,3), (2,3)])
for edge in G.edges_iter():
    G.remove_edge(edge[0],edge[1])
    if not nx.has_path(G,edge[0],edge[1]):
        G.add_edge(edge[0],edge[1])

G.edges()
> [(0, 1), (1, 2), (2, 3)]

import networkx as nx
from collections import defaultdict

def remove_redundant_edges(G):
    processed_child_count = defaultdict(int)  #when all of a nodes children are processed, we'll add it to nodes_to_process
    descendants = defaultdict(set)            #all descendants of a node (including children)
    out_degree = {node:G.out_degree(node) for node in G.nodes_iter()}
    nodes_to_process = [node for node in G.nodes_iter() if out_degree[node]==0] #initially it's all nodes without children
    while nodes_to_process:
        next_nodes = []
        for node in nodes_to_process:
            '''when we enter this loop, the descendants of a node are known, except for direct children.'''
            for child in G.neighbors(node):
                if child in descendants[node]:  #if the child is already an indirect descendant, delete the edge
                    G.remove_edge(node,child)
                else:                                    #otherwise add it to the descendants
                    descendants[node].add(child)
            for predecessor in G.predecessors(node):             #update all parents' indirect descendants
                descendants[predecessor].update(descendants[node])  
                processed_child_count[predecessor]+=1            #we have processed one more child of this parent
                if processed_child_count[predecessor] == out_degree[predecessor]:  #if all children processed, add to list for next iteration.
                    next_nodes.append(predecessor)
        nodes_to_process=next_nodes

G=nx.DiGraph()
G.add_nodes_from(['termsequence', 'maximumdegree', 'emptymultigraph', 'minimum', 'multiset', 'walk', 'nonemptymultigraph', 'euleriantrail', 'nonnullmultigraph', 'cycle', 'loop', 'abwalk', 'endvertices', 'simplegraph', 'vertex', 'multipletrails', 'edge', 'set', 'stroll', 'union', 'trailcondition', 'nullmultigraph', 'trivialmultigraph', 'sequence', 'multiplepaths', 'path', 'degreevertex', 'onedgesonvertices', 'nontrivialmultigraph', 'adjacentedges', 'adjacentvertices', 'simpleedge', 'maximum', 'multipleloops', 'length', 'circuit', 'class', 'euleriangraph', 'incident', 'minimumdegree', 'orderedpair', 'unique', 'closedwalk', 'multipleedges', 'pathcondition', 'multigraph', 'trail'])
G.add_edges_from([('termsequence', 'endvertices'), ('emptymultigraph', 'nonemptymultigraph'), ('minimum', 'minimumdegree'), ('multiset', 'trailcondition'), ('multiset', 'pathcondition'), ('multiset', 'multigraph'), ('walk', 'length'), ('walk', 'closedwalk'), ('walk', 'abwalk'), ('walk', 'trail'), ('walk', 'endvertices'), ('euleriantrail', 'euleriangraph'), ('loop', 'simplegraph'), ('loop', 'degreevertex'), ('loop', 'simpleedge'), ('loop', 'multipleloops'), ('endvertices', 'abwalk'), ('vertex', 'adjacentvertices'), ('vertex', 'onedgesonvertices'), ('vertex', 'walk'), ('vertex', 'adjacentedges'), ('vertex', 'multipleedges'), ('vertex', 'edge'), ('vertex', 'multipleloops'), ('vertex', 'degreevertex'), ('vertex', 'incident'), ('edge', 'adjacentvertices'), ('edge', 'onedgesonvertices'), ('edge', 'multipleedges'), ('edge', 'simpleedge'), ('edge', 'adjacentedges'), ('edge', 'loop'), ('edge', 'trailcondition'), ('edge', 'pathcondition'), ('edge', 'walk'), ('edge', 'incident'), ('set', 'onedgesonvertices'), ('set', 'edge'), ('union', 'multiplepaths'), ('union', 'multipletrails'), ('trailcondition', 'trail'), ('nullmultigraph', 'nonnullmultigraph'), ('sequence', 'walk'), ('sequence', 'endvertices'), ('path', 'cycle'), ('path', 'multiplepaths'), ('degreevertex', 'maximumdegree'), ('degreevertex', 'minimumdegree'), ('onedgesonvertices', 'multigraph'), ('maximum', 'maximumdegree'), ('circuit', 'euleriangraph'), ('class', 'multiplepaths'), ('class', 'multipletrails'), ('incident', 'adjacentedges'), ('incident', 'degreevertex'), ('incident', 'onedgesonvertices'), ('orderedpair', 'multigraph'), ('closedwalk', 'circuit'), ('closedwalk', 'cycle'), ('closedwalk', 'stroll'), ('pathcondition', 'path'), ('multigraph', 'euleriangraph'), ('multigraph', 'nullmultigraph'), ('multigraph', 'trivialmultigraph'), ('multigraph', 'nontrivialmultigraph'), ('multigraph', 'emptymultigraph'), ('multigraph', 'euleriantrail'), ('multigraph', 'simplegraph'), ('trail', 'path'), ('trail', 'circuit'), ('trail', 'multipletrails')])

print G.size()
>71
print G.order()
>47
descendants = {}  #for testing below
for node in G.nodes():
    descendants[node] = nx.descendants(G,node)

remove_redundant_edges(G)  #this removes the edges

print G.size()  #lots of edges gone
>56
print G.order() #no nodes changed.
>47
newdescendants = {}  #for comparison with above
for node in G.nodes():
    newdescendants[node] = nx.descendants(G,node)

for node in G.nodes():  
    if descendants[node] != newdescendants[node]:
        print 'descendants changed!!'   #all nodes have the same descendants
    for child in G.neighbors(node):  
        if len(list(nx.all_simple_paths(G,node, child)))>1:
            print 'bad edge'  #no alternate path exists from a node to its child.

def remove_redundant_edges(G):
    """
    Remove redundant edges from a DAG using networkx (nx).
    An edge is redundant if there is an alternate path
    from its start node to its destination node.

    This algorithm could work front to back, or back to front.
    We choose to work front to back.

    The main persistent variable (in addition to the graph
    itself) is indirect_pred_dict, which is a dictionary with
    one entry per graph node.  Each entry is a set of indirect
    predecessors of this node.

    The algorithmic complexity of the code on a worst-case
    fully-connected graph is O(V**3), where V is the number
    of nodes.
    """

    indirect_pred_dict = collections.defaultdict(set)
    for node in nx.topological_sort(G):
        indirect_pred = indirect_pred_dict[node]
        direct_pred = G.predecessors(node)
        for pred in direct_pred:
            if pred in indirect_pred:
                G.remove_edge(pred, node)
        indirect_pred.update(direct_pred)
        for succ in G.successors(node):
            indirect_pred_dict[succ] |= indirect_pred

最小连通图

O（V+E）

O（V*E）

最大连通图（这是最坏的情况，其中每个节点都连接到图上的每个下游节点），复杂度也是
O（V**3）
。对于这种情况，ops的数目遵循顺序，即
n*（n+1）*（n+2）/6
，其中n是节点的数目（V）减去3
根据图形的形状，可以进行其他优化。以下是一个版本，其中包含一些不同的优化器，可以显著降低某些类型图的复杂性和运行时间：

def remove_redundant_edges(G, optimize_dense=True, optimize_chains=True, optimize_tree=False, optimize_funnel=False): """ Remove redundant edges from a DAG using networkx (nx). An edge is redundant if there is an alternate path from its start node to its destination node. This algorithm could work equally well front to back, or back to front. We choose to work front to back. The main persistent variable (in addition to the graph itself) is indirect_pred_dict, which is a dictionary with one entry per graph node. Each entry is a set of indirect predecessors of this node. The main processing algorithm uses this dictionary to iteratively calculate indirect predecessors and direct predecessors for every node, and prune the direct predecessors edges if they are also accessible indirectly. The algorithmic complexity is O(V**3), where V is the number of nodes in the graph. There are also several graph shape-specific optimizations provided. These optimizations could actually increase run-times, especially for small graphs that are not amenable to the optimizations, so if your execution time is slow, you should test different optimization combinations. But for the right graph shape, these optimizations can provide dramatic improvements. For the fully connected graph (which is worst-case), optimize_dense reduces the algorithmic complexity from O(V**3) to O(V**2). For a completely linear graph, any of the optimize_tree, optimize_chains, or optimize_funnel options would decrease complexity from O(V**2) to O(V). If the optimize_dense option is set to True, then an optimization phase is before the main algorithm. This optimization phase works by looking for matches between each node's successors and that same node's successor's successors (by only looking one level ahead at a time). If the optimize_tree option is set true, then a phase is run that will optimize trees by working right-to-left and recursively removing leaf nodes with a single predecessor. This will also optimize linear graphs, which are degenerate trees. If the optimize_funnel option is set true, then funnels (inverted trees) will be optimized. If the optimize_chains option is set true, then chains (linear sections) will be optimized by sharing the indirect_pred_dict sets. This works because Python checks to see if two sets are the same instance before combining them. For a completely linear graph, optimize_funnel or optimize_tree execute more quickly than optimize_chains. Nonetheless, optimize_chains option is enabled by default, because it is a balanced algorithm that works in more cases than the other two. """ ordered = nx.topological_sort(G) if optimize_dense: succs= dict((node, set(G.successors(node))) for node in ordered) for node in ordered: my_succs = succs.pop(node) kill = set() while my_succs: succ = my_succs.pop() if succ not in kill: check = succs[succ] kill.update(x for x in my_succs if x in check) for succ in kill: G.remove_edge(node, succ) indirect_pred_dict = dict((node, set()) for node in ordered) if optimize_tree: remaining_nodes = set(ordered) for node in reversed(ordered): if G.in_degree(node) == 1: if not (set(G.successors(node)) & remaining_nodes): remaining_nodes.remove(node) ordered = [node for node in ordered if node in remaining_nodes] if optimize_funnel: remaining_nodes = set(ordered) for node in ordered: if G.out_degree(node) == 1: if not (set(G.predecessors(node)) & remaining_nodes): remaining_nodes.remove(node) ordered = [node for node in ordered if node in remaining_nodes] if optimize_chains: # This relies on Python optimizing the set |= operation # by seeing if the objects are identical. for node in ordered: succs = G.successors(node) if len(succs) == 1 and len(G.predecessors(succs[0])) == 1: indirect_pred_dict[succs[0]] = indirect_pred_dict[node] for node in ordered: indirect_pred = indirect_pred_dict.pop(node) direct_pred = G.predecessors(node) for pred in direct_pred: if pred in indirect_pred: G.remove_edge(pred, node) indirect_pred.update(direct_pred) for succ in G.successors(node): indirect_pred_dict[succ] |= indirect_pred
我还没有分析过，在启用了optimize_dense选项的情况下，是否可以构造一个复杂度大于
O（V**2）
的稠密但非最大连通的图，但我事先没有理由相信这是不可能的。优化最适用于最大连通图，并且不会做任何事情，例如，在每个节点与其孙子（而不是其子节点）共享后续节点的情况下，我没有分析此情况的运行时
示例测试台我已经精简了基本算法的代码，添加了记录最坏情况路径所需的操作数的工具，以及生成最大连接图的示例测试生成器

import collections import networkx as nx def makegraph(numnodes): """ Make a fully-connected graph given a number of nodes """ edges = [] for i in range(numnodes): for j in range(i+1, numnodes): edges.append((i, j)) return nx.DiGraph(edges) def remove_redundant_edges(G): ops = 0 indirect_pred_dict = collections.defaultdict(set) for node in nx.topological_sort(G): indirect_pred = indirect_pred_dict[node] direct_pred = G.predecessors(node) for pred in direct_pred: if pred in indirect_pred: G.remove_edge(pred, node) indirect_pred.update(direct_pred) for succ in G.successors(node): indirect_pred_dict[succ] |= indirect_pred ops += len(indirect_pred) return ops def test_1(f, numnodes): G = makegraph(numnodes) e1 = nx.number_of_edges(G) ops = f(G) e2 = nx.number_of_edges(G) return ops, e1, e2 for numnodes in range(30): a = test_1(remove_redundant_edges, numnodes) print numnodes, a[0]

你确定这不是NP吗？换句话说，你知道有什么算法可以在多项式时间内解决这个问题吗？@Joel我编辑了这个问题。这是一个DAG（无定向循环）。@Sait运行时间，最坏情况下，是在一对节点之间找到定向路径的边数乘以运行时间（我不知道具体是什么，我想找出！）注意-我的代码有一个愚蠢的减速。我一直在重新计算G.out_度（节点）。我已经修改了代码，以便在开始时创建dict。这导致了一个数量级的改进。我添加了一些代码来提供可选的优化。例如，对于最大连接的DAG，复杂性从
O（V**3）
降低到
O（V**2）
，因此对于4000节点的完全连接图，运行时间从大约90秒（使用Joel的代码或我的基本算法）减少到大约5秒。你知道这些tw的运行时间吗