Memory 如何分析Julia内存分配和代码覆盖率结果

Memory 如何分析Julia内存分配和代码覆盖率结果,memory,julia,Memory,Julia,我正在编写一个使用Gibbs抽样的贝叶斯推理包。由于这些方法通常计算量很大,我非常关心代码的性能。事实上,速度是我从Python转到Julia的原因 实现后,我使用和--track allocation=user命令行选项分析了代码 以下是报道结果 - #= - DPM - - Dirichlet Process Mixture Models - - 25/08/2015 - Ad

我正在编写一个使用Gibbs抽样的贝叶斯推理包。由于这些方法通常计算量很大,我非常关心代码的性能。事实上,速度是我从Python转到Julia的原因

实现后,我使用和
--track allocation=user
命令行选项分析了代码

以下是报道结果

        - #=
        - DPM
        - 
        - Dirichlet Process Mixture Models
        - 
        - 25/08/2015
        - Adham Beyki, odinay@gmail.com
        - 
        - =#
        - 
        - type DPM{T}
        -   bayesian_component::T
        -   K::Int64
        -   aa::Float64
        -   a1::Float64
        -   a2::Float64
        -   K_hist::Vector{Int64}
        -   K_zz_dict::Dict{Int64, Vector{Int64}}
        - 
        -   DPM{T}(c::T, K::Int64, aa::Float64, a1::Float64, a2::Float64) = new(c, K, aa, a1, a2,
        -           Int64[], (Int64 => Vector{Int64})[])
        - end
        1 DPM{T}(c::T, K::Int64, aa::Real, a1::Real, a2::Real) = DPM{typeof(c)}(c, K, convert(Float64, aa),
        -           convert(Float64, a1), convert(Float64, a2))
        - 
        - function Base.show(io::IO, dpm::DPM)
        -   println(io, "Dirichlet Mixture Model with $(dpm.K) $(typeof(dpm.bayesian_component)) components")
        - end
        - 
        - function initialize_gibbs_sampler!(dpm::DPM, zz::Vector{Int64})
        -   # populates the cluster labels randomly
        1   zz[:] = rand(1:dpm.K, length(zz))
        - end
        - 
        - function DPM_sample_hyperparam(aa::Float64, a1::Float64, a2::Float64, K::Int64, NN::Int64, iters::Int64)
        - 
        -   # resampling concentration parameter based on Escobar and West 1995
      352   for n = 1:iters
     3504       eta = rand(Distributions.Beta(aa+1, NN))
     3504       rr = (a1+K-1) / (NN*(a2-log(NN)))
     3504       pi_eta = rr / (1+rr)
        - 
     3504       if rand() < pi_eta
        0           aa = rand(Distributions.Gamma(a1+K, 1/(a2-log(eta))))
        -       else
     3504           aa = rand(Distributions.Gamma(a1+K-1, 1/(a2-log(eta))))
        -       end
        -   end
      352   aa
        - end
        - 
        - function DPM_sample_pp{T1, T2}(
        -     bayesian_components::Vector{T1},
        -     xx::T2,
        -     nn::Vector{Float64},
        -     pp::Vector{Float64},
        -     aa::Float64)
        - 
  1760000   K = length(nn)
  1760000   @inbounds for kk = 1:K
 11384379     pp[kk] = log(nn[kk]) + logpredictive(bayesian_components[kk], xx)
        -   end
  1760000   pp[K+1] = log(aa) + logpredictive(bayesian_components[K+1], xx)
  1760000   normalize_pp!(pp, K+1)
  1760000   return sample(pp[1:K+1])
        - end
        - 
        - 
        - function collapsed_gibbs_sampler!{T1, T2}(
        -       dpm::DPM{T1},
        -       xx::Vector{T2},
        -       zz::Vector{Int64},
        -       n_burnins::Int64, n_lags::Int64, n_samples::Int64, n_internals::Int64; max_clusters::Int64=100)
        - 
        - 
        2   NN = length(xx)                                          # number of data points
        2   nn = zeros(Float64, dpm.K)                       # count array
        2   n_iterations = n_burnins + (n_samples)*(n_lags+1)
        2   bayesian_components = [deepcopy(dpm.bayesian_component) for k = 1:dpm.K+1]
        2   dpm.K_hist = zeros(Int64, n_iterations)
        2   pp = zeros(Float64, max_clusters)
        - 
        2   tic()
        2   for ii = 1:NN
    10000       kk = zz[ii]
    10000       additem(bayesian_components[kk], xx[ii])
    10000       nn[kk] += 1
        -   end
        2   dpm.K_hist[1] = dpm.K
        2   elapsed_time = toq()
        - 
        2   for iteration = 1:n_iterations
        - 
      352       println("iteration: $iteration, KK: $(dpm.K), KK mode: $(indmax(hist(dpm.K_hist,
        -                       0.5:maximum(dpm.K_hist)+0.5)[2])), elapsed time: $elapsed_time")
        - 
      352       tic()
      352       @inbounds for ii = 1:NN
  1760000           kk = zz[ii]
  1760000           nn[kk] -= 1
  1760000           delitem(bayesian_components[kk], xx[ii])
        - 
        -           # remove the cluster if empty
  1760000           if nn[kk] == 0
      166               println("\tcomponent $kk has become inactive")
      166               splice!(nn, kk)
      166               splice!(bayesian_components, kk)
      166               dpm.K -= 1
        - 
        -               # shifting the labels one cluster back
   830166               idx = find(x -> x>kk, zz)
      166               zz[idx] -= 1
        -           end
        - 
  1760000           kk = DPM_sample_pp(bayesian_components, xx[ii], nn, pp, dpm.aa)
        - 
  1760000           if kk == dpm.K+1
      171               println("\tcomponent $kk activated.")
      171               push!(bayesian_components, deepcopy(dpm.bayesian_component))
      171               push!(nn, 0)
      171               dpm.K += 1
        -           end
        - 
  1760000           zz[ii] = kk
  1760000           nn[kk] += 1
  1760000           additem(bayesian_components[kk], xx[ii])
        -       end
        - 
      352       dpm.aa = DPM_sample_hyperparam(dpm.aa, dpm.a1, dpm.a2, dpm.K, NN, n_internals)
      352       dpm.K_hist[iteration] = dpm.K
      352       dpm.K_zz_dict[dpm.K] = deepcopy(zz)
      352       elapsed_time = toq()
        -   end
        - end
        - 
        - function truncated_gibbs_sampler{T1, T2}(dpm::DPM{T1}, xx::Vector{T2}, zz::Vector{Int64},
        -   n_burnins::Int64, n_lags::Int64, n_samples::Int64, n_internals::Int64, K_truncation::Int64)
        - 
        -   NN = length(xx)                                             # number of data points
        -   nn = zeros(Int64, K_truncation)             # count array
        -   bayesian_components = [deepcopy(dpm.bayesian_component) for k = 1:K_truncation]
        -   n_iterations = n_burnins + (n_samples)*(n_lags+1)
        -   dpm.K_hist = zeros(Int64, n_iterations)
        -   states = (ASCIIString => Int64)[]
        -   n_states = 0
        - 
        -   tic()
        -   for ii = 1:NN
        -       kk = zz[ii]
        -       additem(bayesian_components[kk], xx[ii])
        -       nn[kk] += 1
        -   end
        -   dpm.K_hist[1] = dpm.K
        - 
        -   # constructing the sticks
        -   beta_VV = rand(Distributions.Beta(1.0, dpm.aa), K_truncation)
        -   beta_VV[end] = 1.0
        -   π = ones(Float64, K_truncation)
        -   π[2:end] = 1 - beta_VV[1:K_truncation-1]
        -   π = log(beta_VV) + log(cumprod(π))
        - 
        -   elapsed_time = toq()
        - 
        -   for iteration = 1:n_iterations
        - 
        -       println("iteration: $iteration, # active components: $(length(findn(nn)[1])), mode: $(indmax(hist(dpm.K_hist,
        -                       0.5:maximum(dpm.K_hist)+0.5)[2])), elapsed time: $elapsed_time \n", nn)
        - 
        -       tic()
        -       for ii = 1:NN
        -           kk = zz[ii]
        -           nn[kk] -= 1
        -           delitem(bayesian_components[kk], xx[ii])
        - 
        -           # resampling label
        -           pp = zeros(Float64, K_truncation)
        -           for kk = 1:K_truncation
        -               pp[kk] = π[kk] + logpredictive(bayesian_components[kk], xx[ii])
        -           end
        -           pp = exp(pp - maximum(pp))
        -           pp /= sum(pp)
        - 
        -           # sample from pp
        -           kk = sampleindex(pp)
        -           zz[ii] = kk
        -           nn[kk] += 1
        -           additem(bayesian_components[kk], xx[ii])
        - 
        -           for kk = 1:K_truncation-1
        -               gamma1 = 1 + nn[kk]
        -               gamma2 = dpm.aa + sum(nn[kk+1:end])
        -               beta_VV[kk] = rand(Distributions.Beta(gamma1, gamma2))
        -           end
        -           beta_VV[end] = 1.0
        -           π = ones(Float64, K_truncation)
        -           π[2:end] = 1 - beta_VV[1:K_truncation-1]
        -           π = log(beta_VV) + log(cumprod(π))
        - 
        -           # resampling concentration parameter based on Escobar and West 1995
        -           for internal_iters = 1:n_internals
        -                   eta = rand(Distributions.Beta(dpm.aa+1, NN))
        -               rr = (dpm.a1+dpm.K-1) / (NN*(dpm.a2-log(NN)))
        -               pi_eta = rr / (1+rr)
        - 
        -               if rand() < pi_eta
        -                   dpm.aa = rand(Distributions.Gamma(dpm.a1+dpm.K, 1/(dpm.a2-log(eta))))
        -               else
        -                   dpm.aa = rand(Distributions.Gamma(dpm.a1+dpm.K-1, 1/(dpm.a2-log(eta))))
        -               end
        -           end
        -       end
        - 
        -       nn_string = nn2string(nn)
        -       if !haskey(states, nn_string)
        -           n_states += 1
        -           states[nn_string] = n_states
        -       end
        -       dpm.K_hist[iteration] = states[nn_string]
        -       dpm.K_zz_dict[states[nn_string]] = deepcopy(zz)
        -       elapsed_time = toq()
        -   end
        -   return states
        - end
        - 
        - 
        - function posterior{T1, T2}(dpm::DPM{T1}, xx::Vector{T2}, K::Int64, K_truncation::Int64=0)
        2   n_components = 0
        1   if K_truncation == 0
        1       n_components = K
        -   else
        0       n_components = K_truncation
        -   end
        - 
        1   bayesian_components = [deepcopy(dpm.bayesian_component) for kk=1:n_components]
        1   zz = dpm.K_zz_dict[K]
        - 
        1   NN = length(xx)
        1   nn = zeros(Int64, n_components)
        - 
        1   for ii = 1:NN
     5000       kk = zz[ii]
     5000       additem(bayesian_components[kk], xx[ii])
     5000       nn[kk] += 1
        -   end
        - 
        1   return([posterior(bayesian_components[kk]) for kk=1:n_components], nn)
        - end
        - 
.mem:

或者这个更糟:

冠状病毒:

成员:


答案是简单提到的,“在用户设置下,任何直接从REPL调用的函数的第一行都会由于REPL代码本身中发生的事件而显示分配。”也可能是相关的:“更重要的是,JIT编译也会增加分配计数,因为Julia的编译器大部分都是用Julia编写的(编译通常需要内存分配)。建议执行所有要分析的命令,然后调用Profile.clear_malloc_data()重置所有分配计数器,从而强制编译


一句话:第一行被指责负责其他地方发生的分配,因为这是第一行再次开始报告分配。

NN的值是多少?我建议在分配内存之前,首先使用探查器来识别慢部分。我使用了探查器和
NN=length(xx)
不是瓶颈。
xx
::Vector{Float64}
因此NN只是一个整数,在这个实验中等于5000。
        - #=
        - DPM
        - 
        - Dirichlet Process Mixture Models
        - 
        - 25/08/2015
        - Adham Beyki, odinay@gmail.com
        - 
        - =#
        - 
        - type DPM{T}
        -   bayesian_component::T
        -   K::Int64
        -   aa::Float64
        -   a1::Float64
        -   a2::Float64
        -   K_hist::Vector{Int64}
        -   K_zz_dict::Dict{Int64, Vector{Int64}}
        - 
        -   DPM{T}(c::T, K::Int64, aa::Float64, a1::Float64, a2::Float64) = new(c, K, aa, a1, a2,
        -           Int64[], (Int64 => Vector{Int64})[])
        - end
        0 DPM{T}(c::T, K::Int64, aa::Real, a1::Real, a2::Real) = DPM{typeof(c)}(c, K, convert(Float64, aa),
        -           convert(Float64, a1), convert(Float64, a2))
        - 
        - function Base.show(io::IO, dpm::DPM)
        -   println(io, "Dirichlet Mixture Model with $(dpm.K) $(typeof(dpm.bayesian_component)) components")
        - end
        - 
        - function initialize_gibbs_sampler!(dpm::DPM, zz::Vector{Int64})
        -   # populates the cluster labels randomly
        0   zz[:] = rand(1:dpm.K, length(zz))
        - end
        - 
        - function DPM_sample_hyperparam(aa::Float64, a1::Float64, a2::Float64, K::Int64, NN::Int64, iters::Int64)
        - 
        -   # resampling concentration parameter based on Escobar and West 1995
        0   for n = 1:iters
        0       eta = rand(Distributions.Beta(aa+1, NN))
        0       rr = (a1+K-1) / (NN*(a2-log(NN)))
        0       pi_eta = rr / (1+rr)
        - 
        0       if rand() < pi_eta
        0           aa = rand(Distributions.Gamma(a1+K, 1/(a2-log(eta))))
        -       else
        0           aa = rand(Distributions.Gamma(a1+K-1, 1/(a2-log(eta))))
        -       end
        -   end
        0   aa
        - end
        - 
        - function DPM_sample_pp{T1, T2}(
        -     bayesian_components::Vector{T1},
        -     xx::T2,
        -     nn::Vector{Float64},
        -     pp::Vector{Float64},
        -     aa::Float64)
        - 
        0   K = length(nn)
        0   @inbounds for kk = 1:K
        0     pp[kk] = log(nn[kk]) + logpredictive(bayesian_components[kk], xx)
        -   end
        0   pp[K+1] = log(aa) + logpredictive(bayesian_components[K+1], xx)
        0   normalize_pp!(pp, K+1)
        0   return sample(pp[1:K+1])
        - end
        - 
        - 
        - function collapsed_gibbs_sampler!{T1, T2}(
        -       dpm::DPM{T1},
        -       xx::Vector{T2},
        -       zz::Vector{Int64},
        -       n_burnins::Int64, n_lags::Int64, n_samples::Int64, n_internals::Int64; max_clusters::Int64=100)
        - 
        - 
   191688   NN = length(xx)                                          # number of data points
       96   nn = zeros(Float64, dpm.K)                       # count array
        0   n_iterations = n_burnins + (n_samples)*(n_lags+1)
      384   bayesian_components = [deepcopy(dpm.bayesian_component) for k = 1:dpm.K+1]
     2864   dpm.K_hist = zeros(Int64, n_iterations)
      176   pp = zeros(Float64, max_clusters)
        - 
       48   tic()
        0   for ii = 1:NN
        0       kk = zz[ii]
        0       additem(bayesian_components[kk], xx[ii])
        0       nn[kk] += 1
        -   end
        0   dpm.K_hist[1] = dpm.K
        0   elapsed_time = toq()
        - 
        0   for iteration = 1:n_iterations
        - 
  5329296       println("iteration: $iteration, KK: $(dpm.K), KK mode: $(indmax(hist(dpm.K_hist,
        -                       0.5:maximum(dpm.K_hist)+0.5)[2])), elapsed time: $elapsed_time")
        - 
    16800       tic()
 28000000       @inbounds for ii = 1:NN
        0           kk = zz[ii]
        0           nn[kk] -= 1
        0           delitem(bayesian_components[kk], xx[ii])
        - 
        -           # remove the cluster if empty
        0           if nn[kk] == 0
   161880               println("\tcomponent $kk has become inactive")
        0               splice!(nn, kk)
        0               splice!(bayesian_components, kk)
        0               dpm.K -= 1
        - 
        -               # shifting the labels one cluster back
    69032               idx = find(x -> x>kk, zz)
    42944               zz[idx] -= 1
        -           end
        - 
        0           kk = DPM_sample_pp(bayesian_components, xx[ii], nn, pp, dpm.aa)
        - 
        0           if kk == dpm.K+1
   158976               println("\tcomponent $kk activated.")
    14144               push!(bayesian_components, deepcopy(dpm.bayesian_component))
     4872               push!(nn, 0)
        0               dpm.K += 1
        -           end
        - 
        0           zz[ii] = kk
        0           nn[kk] += 1
        0           additem(bayesian_components[kk], xx[ii])
        -       end
        - 
        0       dpm.aa = DPM_sample_hyperparam(dpm.aa, dpm.a1, dpm.a2, dpm.K, NN, n_internals)
        0       dpm.K_hist[iteration] = dpm.K
 14140000       dpm.K_zz_dict[dpm.K] = deepcopy(zz)
        0       elapsed_time = toq()
        -   end
        - end
        - 
        - function truncated_gibbs_sampler{T1, T2}(dpm::DPM{T1}, xx::Vector{T2}, zz::Vector{Int64},
        -   n_burnins::Int64, n_lags::Int64, n_samples::Int64, n_internals::Int64, K_truncation::Int64)
        - 
        -   NN = length(xx)                                             # number of data points
        -   nn = zeros(Int64, K_truncation)             # count array
        -   bayesian_components = [deepcopy(dpm.bayesian_component) for k = 1:K_truncation]
        -   n_iterations = n_burnins + (n_samples)*(n_lags+1)
        -   dpm.K_hist = zeros(Int64, n_iterations)
        -   states = (ASCIIString => Int64)[]
        -   n_states = 0
        - 
        -   tic()
        -   for ii = 1:NN
        -       kk = zz[ii]
        -       additem(bayesian_components[kk], xx[ii])
        -       nn[kk] += 1
        -   end
        -   dpm.K_hist[1] = dpm.K
        - 
        -   # constructing the sticks
        -   beta_VV = rand(Distributions.Beta(1.0, dpm.aa), K_truncation)
        -   beta_VV[end] = 1.0
        -   π = ones(Float64, K_truncation)
        -   π[2:end] = 1 - beta_VV[1:K_truncation-1]
        -   π = log(beta_VV) + log(cumprod(π))
        - 
        -   elapsed_time = toq()
        - 
        -   for iteration = 1:n_iterations
        - 
        -       println("iteration: $iteration, # active components: $(length(findn(nn)[1])), mode: $(indmax(hist(dpm.K_hist,
        -                       0.5:maximum(dpm.K_hist)+0.5)[2])), elapsed time: $elapsed_time \n", nn)
        - 
        -       tic()
        -       for ii = 1:NN
        -           kk = zz[ii]
        -           nn[kk] -= 1
        -           delitem(bayesian_components[kk], xx[ii])
        - 
        -           # resampling label
        -           pp = zeros(Float64, K_truncation)
        -           for kk = 1:K_truncation
        -               pp[kk] = π[kk] + logpredictive(bayesian_components[kk], xx[ii])
        -           end
        -           pp = exp(pp - maximum(pp))
        -           pp /= sum(pp)
        - 
        -           # sample from pp
        -           kk = sampleindex(pp)
        -           zz[ii] = kk
        -           nn[kk] += 1
        -           additem(bayesian_components[kk], xx[ii])
        - 
        -           for kk = 1:K_truncation-1
        -               gamma1 = 1 + nn[kk]
        -               gamma2 = dpm.aa + sum(nn[kk+1:end])
        -               beta_VV[kk] = rand(Distributions.Beta(gamma1, gamma2))
        -           end
        -           beta_VV[end] = 1.0
        -           π = ones(Float64, K_truncation)
        -           π[2:end] = 1 - beta_VV[1:K_truncation-1]
        -           π = log(beta_VV) + log(cumprod(π))
        - 
        -           # resampling concentration parameter based on Escobar and West 1995
        -           for internal_iters = 1:n_internals
        -                   eta = rand(Distributions.Beta(dpm.aa+1, NN))
        -               rr = (dpm.a1+dpm.K-1) / (NN*(dpm.a2-log(NN)))
        -               pi_eta = rr / (1+rr)
        - 
        -               if rand() < pi_eta
        -                   dpm.aa = rand(Distributions.Gamma(dpm.a1+dpm.K, 1/(dpm.a2-log(eta))))
        -               else
        -                   dpm.aa = rand(Distributions.Gamma(dpm.a1+dpm.K-1, 1/(dpm.a2-log(eta))))
        -               end
        -           end
        -       end
        - 
        -       nn_string = nn2string(nn)
        -       if !haskey(states, nn_string)
        -           n_states += 1
        -           states[nn_string] = n_states
        -       end
        -       dpm.K_hist[iteration] = states[nn_string]
        -       dpm.K_zz_dict[states[nn_string]] = deepcopy(zz)
        -       elapsed_time = toq()
        -   end
        -   return states
        - end
        - 
        - 
        - function posterior{T1, T2}(dpm::DPM{T1}, xx::Vector{T2}, K::Int64, K_truncation::Int64=0)
        0   n_components = 0
        0   if K_truncation == 0
        0       n_components = K
        -   else
        0       n_components = K_truncation
        -   end
        - 
        0   bayesian_components = [deepcopy(dpm.bayesian_component) for kk=1:n_components]
        0   zz = dpm.K_zz_dict[K]
        - 
        0   NN = length(xx)
        0   nn = zeros(Int64, n_components)
        - 
        0   for ii = 1:NN
        0       kk = zz[ii]
        0       additem(bayesian_components[kk], xx[ii])
        0       nn[kk] += 1
        -   end
        - 
        0   return([posterior(bayesian_components[kk]) for kk=1:n_components], nn)
        - end
        - 
    2   NN = length(xx)                  # number of data points
   191688   NN = length(xx)              # number of data points
  352       @inbounds for ii = 1:NN
  28000000      @inbounds for ii = 1:NN