Python 创建;“最小连接”;有向无环图

  • NetworkX是否有内置函数来执行此操作
  • 我真的不想重新发明轮子

  • [可选]只有在1的答案为“否”的情况下,实现这一点的最有效算法是什么(对于相当密集的图形)
  • 以下是需要清洁的DAG示例:

    • 这些节点是:

      ['termsequence', 'maximumdegree', 'emptymultigraph', 'minimum', 'multiset', 'walk', 'nonemptymultigraph', 'euleriantrail', 'nonnullmultigraph', 'cycle', 'loop', 'abwalk', 'endvertices', 'simplegraph', 'vertex', 'multipletrails', 'edge', 'set', 'stroll', 'union', 'trailcondition', 'nullmultigraph', 'trivialmultigraph', 'sequence', 'multiplepaths', 'path', 'degreevertex', 'onedgesonvertices', 'nontrivialmultigraph', 'adjacentedges', 'adjacentvertices', 'simpleedge', 'maximum', 'multipleloops', 'length', 'circuit', 'class', 'euleriangraph', 'incident', 'minimumdegree', 'orderedpair', 'unique', 'closedwalk', 'multipleedges', 'pathcondition', 'multigraph', 'trail']
    • 边缘是:

      [('termsequence', 'endvertices'), ('emptymultigraph', 'nonemptymultigraph'), ('minimum', 'minimumdegree'), ('multiset', 'trailcondition'), ('multiset', 'pathcondition'), ('multiset', 'multigraph'), ('walk', 'length'), ('walk', 'closedwalk'), ('walk', 'abwalk'), ('walk', 'trail'), ('walk', 'endvertices'), ('euleriantrail', 'euleriangraph'), ('loop', 'simplegraph'), ('loop', 'degreevertex'), ('loop', 'simpleedge'), ('loop', 'multipleloops'), ('endvertices', 'abwalk'), ('vertex', 'adjacentvertices'), ('vertex', 'onedgesonvertices'), ('vertex', 'walk'), ('vertex', 'adjacentedges'), ('vertex', 'multipleedges'), ('vertex', 'edge'), ('vertex', 'multipleloops'), ('vertex', 'degreevertex'), ('vertex', 'incident'), ('edge', 'adjacentvertices'), ('edge', 'onedgesonvertices'), ('edge', 'multipleedges'), ('edge', 'simpleedge'), ('edge', 'adjacentedges'), ('edge', 'loop'), ('edge', 'trailcondition'), ('edge', 'pathcondition'), ('edge', 'walk'), ('edge', 'incident'), ('set', 'onedgesonvertices'), ('set', 'edge'), ('union', 'multiplepaths'), ('union', 'multipletrails'), ('trailcondition', 'trail'), ('nullmultigraph', 'nonnullmultigraph'), ('sequence', 'walk'), ('sequence', 'endvertices'), ('path', 'cycle'), ('path', 'multiplepaths'), ('degreevertex', 'maximumdegree'), ('degreevertex', 'minimumdegree'), ('onedgesonvertices', 'multigraph'), ('maximum', 'maximumdegree'), ('circuit', 'euleriangraph'), ('class', 'multiplepaths'), ('class', 'multipletrails'), ('incident', 'adjacentedges'), ('incident', 'degreevertex'), ('incident', 'onedgesonvertices'), ('orderedpair', 'multigraph'), ('closedwalk', 'circuit'), ('closedwalk', 'cycle'), ('closedwalk', 'stroll'), ('pathcondition', 'path'), ('multigraph', 'euleriangraph'), ('multigraph', 'nullmultigraph'), ('multigraph', 'trivialmultigraph'), ('multigraph', 'nontrivialmultigraph'), ('multigraph', 'emptymultigraph'), ('multigraph', 'euleriantrail'), ('multigraph', 'simplegraph'), ('trail', 'path'), ('trail', 'circuit'), ('trail', 'multipletrails')]
    • 是的



      def multiple_paths(G,source,target):
          '''returns True if there are multiple_paths, False otherwise'''
          path_generator = nx.all_simple_paths(G, source=source, target=target)
          counter = 0
          for path in path_generator: #test to see if there are multiple paths
              counter += 1
              if counter >1:  
                  break  #instead of breaking, could have return True
          if counter >1:  #counter == 2
              return True
          else:  #counter == 0 or 1
              return False
      import networkx as nx
      G.add_edges_from([(0,1), (1,2), (1,3), (0,3), (2,3)])
      > False
      > False
      > True
      for edge in G.edges_iter():  #let's do what you're trying to do
          if multiple_paths(G, edge[0], edge[1]):
      > [(0, 1), (1, 2), (2, 3)]


      import networkx as nx
      G.add_edges_from([(0,1), (1,2), (1,3), (0,3), (2,3)])
      for edge in G.edges_iter():
          if not nx.has_path(G,edge[0],edge[1]):
      > [(0, 1), (1, 2), (2, 3)]






      import networkx as nx
      from collections import defaultdict
      def remove_redundant_edges(G):
          processed_child_count = defaultdict(int)  #when all of a nodes children are processed, we'll add it to nodes_to_process
          descendants = defaultdict(set)            #all descendants of a node (including children)
          out_degree = {node:G.out_degree(node) for node in G.nodes_iter()}
          nodes_to_process = [node for node in G.nodes_iter() if out_degree[node]==0] #initially it's all nodes without children
          while nodes_to_process:
              next_nodes = []
              for node in nodes_to_process:
                  '''when we enter this loop, the descendants of a node are known, except for direct children.'''
                  for child in G.neighbors(node):
                      if child in descendants[node]:  #if the child is already an indirect descendant, delete the edge
                      else:                                    #otherwise add it to the descendants
                  for predecessor in G.predecessors(node):             #update all parents' indirect descendants
                      processed_child_count[predecessor]+=1            #we have processed one more child of this parent
                      if processed_child_count[predecessor] == out_degree[predecessor]:  #if all children processed, add to list for next iteration.

      G.add_nodes_from(['termsequence', 'maximumdegree', 'emptymultigraph', 'minimum', 'multiset', 'walk', 'nonemptymultigraph', 'euleriantrail', 'nonnullmultigraph', 'cycle', 'loop', 'abwalk', 'endvertices', 'simplegraph', 'vertex', 'multipletrails', 'edge', 'set', 'stroll', 'union', 'trailcondition', 'nullmultigraph', 'trivialmultigraph', 'sequence', 'multiplepaths', 'path', 'degreevertex', 'onedgesonvertices', 'nontrivialmultigraph', 'adjacentedges', 'adjacentvertices', 'simpleedge', 'maximum', 'multipleloops', 'length', 'circuit', 'class', 'euleriangraph', 'incident', 'minimumdegree', 'orderedpair', 'unique', 'closedwalk', 'multipleedges', 'pathcondition', 'multigraph', 'trail'])
      G.add_edges_from([('termsequence', 'endvertices'), ('emptymultigraph', 'nonemptymultigraph'), ('minimum', 'minimumdegree'), ('multiset', 'trailcondition'), ('multiset', 'pathcondition'), ('multiset', 'multigraph'), ('walk', 'length'), ('walk', 'closedwalk'), ('walk', 'abwalk'), ('walk', 'trail'), ('walk', 'endvertices'), ('euleriantrail', 'euleriangraph'), ('loop', 'simplegraph'), ('loop', 'degreevertex'), ('loop', 'simpleedge'), ('loop', 'multipleloops'), ('endvertices', 'abwalk'), ('vertex', 'adjacentvertices'), ('vertex', 'onedgesonvertices'), ('vertex', 'walk'), ('vertex', 'adjacentedges'), ('vertex', 'multipleedges'), ('vertex', 'edge'), ('vertex', 'multipleloops'), ('vertex', 'degreevertex'), ('vertex', 'incident'), ('edge', 'adjacentvertices'), ('edge', 'onedgesonvertices'), ('edge', 'multipleedges'), ('edge', 'simpleedge'), ('edge', 'adjacentedges'), ('edge', 'loop'), ('edge', 'trailcondition'), ('edge', 'pathcondition'), ('edge', 'walk'), ('edge', 'incident'), ('set', 'onedgesonvertices'), ('set', 'edge'), ('union', 'multiplepaths'), ('union', 'multipletrails'), ('trailcondition', 'trail'), ('nullmultigraph', 'nonnullmultigraph'), ('sequence', 'walk'), ('sequence', 'endvertices'), ('path', 'cycle'), ('path', 'multiplepaths'), ('degreevertex', 'maximumdegree'), ('degreevertex', 'minimumdegree'), ('onedgesonvertices', 'multigraph'), ('maximum', 'maximumdegree'), ('circuit', 'euleriangraph'), ('class', 'multiplepaths'), ('class', 'multipletrails'), ('incident', 'adjacentedges'), ('incident', 'degreevertex'), ('incident', 'onedgesonvertices'), ('orderedpair', 'multigraph'), ('closedwalk', 'circuit'), ('closedwalk', 'cycle'), ('closedwalk', 'stroll'), ('pathcondition', 'path'), ('multigraph', 'euleriangraph'), ('multigraph', 'nullmultigraph'), ('multigraph', 'trivialmultigraph'), ('multigraph', 'nontrivialmultigraph'), ('multigraph', 'emptymultigraph'), ('multigraph', 'euleriantrail'), ('multigraph', 'simplegraph'), ('trail', 'path'), ('trail', 'circuit'), ('trail', 'multipletrails')])
      print G.size()
      print G.order()
      descendants = {}  #for testing below
      for node in G.nodes():
          descendants[node] = nx.descendants(G,node)
      remove_redundant_edges(G)  #this removes the edges
      print G.size()  #lots of edges gone
      print G.order() #no nodes changed.
      newdescendants = {}  #for comparison with above
      for node in G.nodes():
          newdescendants[node] = nx.descendants(G,node)
      for node in G.nodes():  
          if descendants[node] != newdescendants[node]:
              print 'descendants changed!!'   #all nodes have the same descendants
          for child in G.neighbors(node):  
              if len(list(nx.all_simple_paths(G,node, child)))>1:
                  print 'bad edge'  #no alternate path exists from a node to its child.



      def remove_redundant_edges(G):
          Remove redundant edges from a DAG using networkx (nx).
          An edge is redundant if there is an alternate path
          from its start node to its destination node.
          This algorithm could work front to back, or back to front.
          We choose to work front to back.
          The main persistent variable (in addition to the graph
          itself) is indirect_pred_dict, which is a dictionary with
          one entry per graph node.  Each entry is a set of indirect
          predecessors of this node.
          The algorithmic complexity of the code on a worst-case
          fully-connected graph is O(V**3), where V is the number
          of nodes.
          indirect_pred_dict = collections.defaultdict(set)
          for node in nx.topological_sort(G):
              indirect_pred = indirect_pred_dict[node]
              direct_pred = G.predecessors(node)
              for pred in direct_pred:
                  if pred in indirect_pred:
                      G.remove_edge(pred, node)
              for succ in G.successors(node):
                  indirect_pred_dict[succ] |= indirect_pred
      复杂性分析与大O优化 对于最小连通图,其中每个节点仅连接到一条边,复杂性为


      def remove_redundant_edges(G, optimize_dense=True, optimize_chains=True,
                                    optimize_tree=False,  optimize_funnel=False):
          Remove redundant edges from a DAG using networkx (nx).
          An edge is redundant if there is an alternate path
          from its start node to its destination node.
          This algorithm could work equally well front to back,
          or back to front. We choose to work front to back.
          The main persistent variable (in addition to the graph
          itself) is indirect_pred_dict, which is a dictionary with
          one entry per graph node.  Each entry is a set of indirect
          predecessors of this node.
          The main processing algorithm uses this dictionary to
          iteratively calculate indirect predecessors and direct
          predecessors for every node, and prune the direct
          predecessors edges if they are also accessible indirectly.
          The algorithmic complexity is O(V**3), where V is the
          number of nodes in the graph.
          There are also several graph shape-specific optimizations
          provided.  These optimizations could actually increase
          run-times, especially for small graphs that are not amenable
          to the optimizations, so if your execution time is slow,
          you should test different optimization combinations.
          But for the right graph shape, these optimizations can
          provide dramatic improvements.  For the fully connected
          graph (which is worst-case), optimize_dense reduces the
          algorithmic complexity from O(V**3) to O(V**2).
          For a completely linear graph, any of the optimize_tree,
          optimize_chains, or optimize_funnel options would decrease
          complexity from O(V**2) to O(V).
          If the optimize_dense option is set to True, then an
          optimization phase is before the main algorithm.  This
          optimization phase works by looking for matches between
          each node's successors and that same node's successor's
          successors (by only looking one level ahead at a time).
          If the optimize_tree option is set true, then a phase is
          run that will optimize trees by working right-to-left and
          recursively removing leaf nodes with a single predecessor.
          This will also optimize linear graphs, which are degenerate
          If the optimize_funnel option is set true, then funnels
          (inverted trees) will be optimized.
          If the optimize_chains option is set true, then chains
          (linear sections) will be optimized by sharing the
          indirect_pred_dict sets.  This works because Python
          checks to see if two sets are the same instance before
          combining them.
          For a completely linear graph, optimize_funnel or optimize_tree
          execute more quickly than optimize_chains.  Nonetheless,
          optimize_chains option is enabled by default, because
          it is a balanced algorithm that works in more cases than
          the other two.
          ordered = nx.topological_sort(G)
          if optimize_dense:
              succs= dict((node, set(G.successors(node))) for node in ordered)
              for node in ordered:
                  my_succs = succs.pop(node)
                  kill = set()
                  while my_succs:
                      succ = my_succs.pop()
                      if succ not in kill:
                          check = succs[succ]
                          kill.update(x for x in my_succs if x in check)
                  for succ in kill:
                      G.remove_edge(node, succ)
          indirect_pred_dict = dict((node, set()) for node in ordered)
          if optimize_tree:
              remaining_nodes = set(ordered)
              for node in reversed(ordered):
                  if G.in_degree(node) == 1:
                      if not (set(G.successors(node)) & remaining_nodes):
              ordered = [node for node in ordered if node in remaining_nodes]
          if optimize_funnel:
              remaining_nodes = set(ordered)
              for node in ordered:
                  if G.out_degree(node) == 1:
                      if not (set(G.predecessors(node)) & remaining_nodes):
              ordered = [node for node in ordered if node in remaining_nodes]
          if optimize_chains:
              # This relies on Python optimizing the set |= operation
              # by seeing if the objects are identical.
              for node in ordered:
                  succs = G.successors(node)
                  if len(succs) == 1 and len(G.predecessors(succs[0])) == 1:
                      indirect_pred_dict[succs[0]] = indirect_pred_dict[node]
          for node in ordered:
              indirect_pred = indirect_pred_dict.pop(node)
              direct_pred = G.predecessors(node)
              for pred in direct_pred:
                  if pred in indirect_pred:
                      G.remove_edge(pred, node)
              for succ in G.successors(node):
                  indirect_pred_dict[succ] |= indirect_pred

      示例测试台 我已经精简了基本算法的代码,添加了记录最坏情况路径所需的操作数的工具,以及生成最大连接图的示例测试生成器

      import collections
      import networkx as nx
      def makegraph(numnodes):
          Make a fully-connected graph given a number of nodes
          edges = []
          for i in range(numnodes):
              for j in range(i+1, numnodes):
                  edges.append((i, j))
          return nx.DiGraph(edges)
      def remove_redundant_edges(G):
          ops = 0
          indirect_pred_dict = collections.defaultdict(set)
          for node in nx.topological_sort(G):
              indirect_pred = indirect_pred_dict[node]
              direct_pred = G.predecessors(node)
              for pred in direct_pred:
                  if pred in indirect_pred:
                      G.remove_edge(pred, node)
              for succ in G.successors(node):
                  indirect_pred_dict[succ] |= indirect_pred
                  ops += len(indirect_pred)
          return ops
      def test_1(f, numnodes):
          G = makegraph(numnodes)
          e1 = nx.number_of_edges(G)
          ops = f(G)
          e2 = nx.number_of_edges(G)
          return ops, e1, e2
      for numnodes in range(30):
          a = test_1(remove_redundant_edges, numnodes)
          print numnodes, a[0]
