Algorithm 联合查找不相交集的朴素函数实现性能较差_Algorithm_Haskell_Data Structures_Profiling_Disjoint Sets

Algorithm 联合查找不相交集的朴素函数实现性能较差

algorithm haskell data-structures

Algorithm 联合查找不相交集的朴素函数实现性能较差,algorithm,haskell,data-structures,profiling,disjoint-sets,Algorithm,Haskell,Data Structures,Profiling,Disjoint Sets,以下UFDS实现的性能较差。有人能告诉我为什么会这样吗？以下是分析报告： total time = 0.10 secs (98 ticks @ 1000 us, 1 processor) total alloc = 78,869,168 bytes (excludes profiling overheads) COST CENTRE MODULE SRC %

以下UFDS实现的性能较差。有人能告诉我为什么会这样吗？以下是分析报告：

    total time  =        0.10 secs   (98 ticks @ 1000 us, 1 processor)
    total alloc =  78,869,168 bytes  (excludes profiling overheads)

COST CENTRE        MODULE    SRC                                        %time %alloc

x.\                Main      src/merging_communities.hs:67:54-71         37.8    0.0
foldMap            Main      src/merging_communities.hs:(31,3)-(32,55)   22.4    0.0
x                  Main      src/merging_communities.hs:(65,1)-(68,79)   20.4   83.2
getElemTree        Main      src/merging_communities.hs:40:1-43          19.4    0.0
main.initialForest Main      src/merging_communities.hs:103:7-51          0.0   16.2

main.hs

您的

getElemTree

需要线性时间。通常，整个想法是可以检查两棵树在树的深度是否相同。对，但如果我们添加线性时间运算，它们不会真正影响性能，对吗？在

unWeightedUnion

中检查树的相等性是不必要的昂贵-如果您发现树的索引包含

和

，那么你所需要做的就是一个整数比较。这不是一个在纯/懒惰环境中没有同等复杂度解决方案的问题吗？@Carl上次我听说（承认是几年前的事了），现在还不知道是否有一个懒惰的纯函数解决方案的复杂度和（fast）一样好迫切的解决办法。它相信已经证明在严格的纯功能环境中不存在这样的解决方案，但懒惰增加了足够多的（“隐藏”）变异，这可能是可能的（但是，据我所知，目前没有人确定）。这里有一个简短的参考（从2013年开始）。

   module Main where

import Control.Monad
import Control.Monad.State.Lazy
import Data.Foldable
import Data.Functor
import Data.List
import Data.Maybe
import Data.Monoid
import Prelude
import System.IO
import Text.Pretty.Simple

--import Text.Pretty.Simple (pPrint)
--The Union-Find algorithm and Disjoint Sets (UFDS) data structureare used which is able to efficiently (i.e. in nearly constant time) determine which set an item belongs to, 
--test if two items belong to the same set, and union two disjoint sets into one when needed. 
--It can be used to find connected components in an undirected graph, and can hence be used as part of Kruskal's algorithm for the Minimum Spanning Tree (MST) problem.
data Tree a =
  Node a
       [Tree a]
  deriving (Show)

instance (Eq a) => Eq (Tree a) where
  (Node a forestA) == (Node b forestB) = a == b && forestA == forestB

instance Functor Tree where
  fmap f (Node a []) = Node (f a) []
  fmap f (Node a forest) = Node (f a) (fmap (fmap f) forest)

instance Foldable Tree where
  foldMap f (Node a []) = f a
  foldMap f (Node a xs) = f a <> foldMap (foldMap f) xs

-- each disjoint set has a representative element which is used to uniquely identify the set. We can use a tree to represent a disjoint set where
-- the representative element is the root node of the tree
makeSet :: a -> Tree a
makeSet a = Node a []

getElemTree :: Eq a => a -> [Tree a] -> Maybe (Tree a)
getElemTree a forest = find (elem a) forest

size :: Tree a -> Int
size (Node a []) = 1
size (Node a forest) = 1 + (sum $ fmap size forest)

depth :: Tree a -> Int
depth (Node a []) = 1
depth (Node a forest) = 1 + (maximum $ fmap ((+ 1) . depth) forest)

flatten :: Tree a -> [a]
flatten (Node a forest) = [a] ++ (foldMap flatten forest)

-- set the parent of one of the roots to the other tree's root - which one we choose is based on our weighting
unWeightedUnion :: Eq a => a -> a -> [Tree a] -> [Tree a]
unWeightedUnion a b forest
  | isNothing treeA || treeA == treeB = forest
  | otherwise =
    let tA@(Node rootA forestA) = fromJust $ treeA
        tB@(Node rootB forestB) = fromJust $ treeB
     in changeRoot tA tB forest
  where
    treeA = getElemTree a forest
    treeB = getElemTree b forest

changeRoot tA@(Node rootA forestA) tB@(Node rootB forestB) forest =
  if (size tA <= size tB)
    then (Node rootA (tB : forestA)) : filter (\t -> t /= tB && t /= tA) forest
    else (Node rootB (tA : forestB)) : filter (\t -> t /= tB && t /= tA) forest

-- union by rank is a weighting which keeps our trees as shallow as possible When we weight by rank or tree depth we make the shallower tree root the child of the deeper tree's root
getRoot :: Tree a -> a -- get the root node
getRoot (Node a _) = a

-- return the name of the set containing the node x ie the root node of the set containing node x
-- use path compression - if parent is not the root then set the parent of the node to the root
data Query
  = M Int
      Int
  | Q Int
  deriving (Show, Read)

executeQuery :: [Query] -> Int -> StateT [Tree Int] IO Query
executeQuery [] _ = return $ M 1 1
executeQuery qs pop = do
  forest <- get
  case head qs of
    (M a b) -> do
      let newForest = unWeightedUnion a b forest
      put newForest
      executeQuery (tail qs) pop
    (Q a) -> do
      liftIO $ print $ size $ fromJust $ getElemTree a forest
      executeQuery (tail qs) pop

main = do
  contents <- readFile "queries.txt"
  print $ lines contents
  let population = head $ words contents
  let queries = map read $ tail $ lines contents :: [Query]
  let population = read $ head $ words contents :: Int
  let initialForest = map makeSet [1 .. population]
  execStateT (executeQuery queries population) initialForest

100000 200000
M 68770 97917
M 65906 74478
M 78744 21384
M 36186 31560
Q 43063
M 12923 73331
M 91542 54702
M 62459 96133
M 13196 56121
M 1648 86052
M 99517 97247
M 59768 66017
Q 48274
Q 96430
M 44341 70873
Q 74989
Q 71357
M 72482 16677
Q 8219