Warning: file_get_contents(/data/phpspider/zhask/data//catemap/2/python/299.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181

Warning: file_get_contents(/data/phpspider/zhask/data//catemap/9/loops/2.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
加速Python中的集成函数_Python_Loops_Numpy_Optimization - Fatal编程技术网

加速Python中的集成函数

加速Python中的集成函数,python,loops,numpy,optimization,Python,Loops,Numpy,Optimization,我有一个函数,它是某个更大问题的内环。因此,它将被称为数百万次。我试着优化它。但由于这是我的第一个数字项目,我想知道是否有其他方法可以提高速度 赛顿似乎帮不了什么忙。也许numpy已经接近c了。 或者我没有有效地编写cython代码 将numpy导入为np 输入数学 将numexpr作为ne导入 par_mu_rho=0.8 par_alpha_rho=0.7 #“前两个是mus和the的平均值” #“后两个是阿尔法的平均值。” cov_epsilon=[[1,par_mu_rho],[par_

我有一个函数,它是某个更大问题的内环。因此,它将被称为数百万次。我试着优化它。但由于这是我的第一个数字项目,我想知道是否有其他方法可以提高速度

赛顿似乎帮不了什么忙。也许numpy已经接近c了。 或者我没有有效地编写cython代码

将numpy导入为np
输入数学
将numexpr作为ne导入
par_mu_rho=0.8
par_alpha_rho=0.7
#“前两个是mus和the的平均值”
#“后两个是阿尔法的平均值。”
cov_epsilon=[[1,par_mu_rho],[par_mu_rho,1]]
cov_nu=[[1,par_alpha_rho],[par_alpha_rho,1]]
nrows=10000
np.随机种子(123)
εsim=np.随机多变量正态分布([0,0],covε,nrows)
nu_sim=np.随机多变量正态分布([0,0],cov_nu,nrows)
错误=np.连接((εsim,nu sim),轴=1)
errors=np.exp(错误)
###需要优化的功能
def mktout(平均μα、误差、parγ):
mu10=错误[:,0]*math.exp(平均值[0])
mu11=数学表达式(par_gamma)*mu10#mu与gamma
mu20=错误[:,1]*math.exp(平均值[1])
mu21=数学经验(par_gamma)*mu20
alpha1=错误[:,2]*math.exp(平均值[2])
alpha2=错误[:,3]*math.exp(平均值[3])
j_更大=(mu10>mu20)
#使用其他1=(mu10<1/168)
阈值2=(1+mu10*alpha1)/(168+alpha1)
#useboth1=(mu21>=阈值2)
j_更小=~j_更大
#使用其他2=(mu20<1/168)
阈值3=(1+mu20*alpha2)/(168+alpha2)
#useboth2=(mu11>=阈值3)
案例1=j_大于*(mu10<1/168)
案例2=j_大于*(mu21>=threshold2)
#案例3=j_大于*(1-(使用其他1 |使用两个1))
案例3=j|u大于^(案例1|案例2)
案例4=j_更小*(mu20<1/168)
案例5=j_更小*(mu11>=threshold3)
#案例6=j_更小*(1-(使用其他2 |使用两个2))
案例6=j|u较小^(案例4|案例5)
t0=ne(
“案例1*168+案例2*(168+字母1+字母2)/(1+mu11*字母1+mu21*字母2)+案例3/阈值2+案例4*168+案例5*(168+字母1+字母2)/(1+mu11*字母1+mu21*字母2)+案例6/阈值3”)
#在某些情况下,t1无论如何都是0,所以这里省略了它们。
t1=ne(
“案例2*(t0*阿尔法1*mu11-阿尔法1)+案例3*(t0*阿尔法1*mu10-阿尔法1)+案例5*(t0*阿尔法1*mu11-阿尔法1)”
#t2=(j_更大*使用两个1*(t0*alpha2*mu21-alpha2)+
#j_较小*使用范围2*(t0*alpha2*mu21-alpha2)+
#j_更小*(1-(使用其他2 |使用both2))*(t0*α2*mu20-α2)
#       )
t2=168-t0-t1
p12=案例2+案例5
p1=案例3+p12
p2=案例6+p12
返回t1.sum()/10000,t2.sum()/10000,p1.sum()/10000,p2.sum()/10000
timeit mktout([-6,-6,-1,-1],错误,-0.7)
在2.2GHz i7的旧mac上。该函数的运行速度约为200µs

更新:

基于@CodeSurgeor和@GZ0的建议和代码,我决定使用以下代码

def mktout_full(双[:]平均值α,双[:,::1]错误,双参数γ):
cdef:
尺寸
双[4]经验
双经验parγ
双mu10、mu11、mu20、mu21
双字母1,字母2
双阈值2,阈值3
双t0,t1,t2
双t1和,t2和,p1和,p2和,p12和
双c
#计算循环外部的exp
n=错误。形状[0]
exp[0]=cmath.exp(平均μα[0])
exp[1]=cmath.exp(平均μα[1])
exp[2]=cmath.exp(平均μα[2])
exp[3]=cmath.exp(平均μα[3])
exp\u par\u gamma=cmath.exp(par\u gamma)
c=168.0
t1_和=0.0
t2_和=0.0
p1_和=0.0
p2_和=0.0
p12_总和=0.0
对于范围(n)中的i:
mu10=错误[i,0]*exp[0]
#mu11=exp\u par\u gamma*mu10
mu20=错误[i,1]*exp[1]
#mu21=exp\u par\u gamma*mu20
#alpha1=错误[i,2]*exp[2]
#alpha2=错误[i,3]*exp[3]
#j_更大=mu10>mu20
#j_更小=而不是j_更大
如果(mu10>=mu20):
如果(mu10>=1/c):
mu21=exp\u par\u gamma*mu20
alpha1=错误[i,2]*exp[2]
alpha2=错误[i,3]*exp[3]
阈值2=(1+mu10*alpha1)/(c+alpha1)
如果(mu21>=阈值2):
mu11=exp\u par\u gamma*mu10
t0=(c+alpha1+alpha2)/(1+mu11*alpha1+mu21*alpha2)
t1=(t0*alpha1*mu11-alpha1)
t1_和+=t1
t2_sum+=c-t0-t1
p1_总和+=1
p2_总和+=1
p12_总和+=1
其他:
t1_和+=((1/阈值2)*α1*mu10-α1)
p1_总和+=1
其他:
如果(mu20>=1/c):
mu11=exp\u par\u gamma*mu10
alpha1=错误[i,2]*exp[2]
alpha2=错误[i,3]*exp[3]
阈值3=(1+mu20*alpha2)/(c+alpha2)
如果(mu11>=阈值3):
mu21=exp\u par\u gamma*mu20
t0=(c+alpha1+alpha2)/(1+mu11*alpha1+mu21*alpha2)
t1=(t0*alpha1*mu11-alpha1)
t1_和+=t1
t2_sum+=c-t0-t1
p1_总和+=1
p2_总和+=1
p12_总和+=1
其他:
t2_和+=((1/阈值3)*α2*mu20-α2)
p2_总和+=1
返回t1求和/n、t2求和/n、p1求和/n、p2求和/n、p12求和/n
我的原始代码以650µs的速度运行。
mktout
mktout_if
由code外科医生在大约220µs和120µs下运行。
from libc cimport math as cmath
from libc.stdint cimport *
from libc.stdlib cimport *

def mktout(list mean_mu_alpha, double[:, ::1] errors, double par_gamma):
    cdef:
        size_t i, n
        double[4] exp
        double exp_par_gamma
        double mu10, mu11, mu20, mu21
        double alpha1, alpha2
        bint j_is_larger, j_is_smaller
        double threshold2, threshold3
        bint case1, case2, case3, case4, case5, case6
        double t0, t1, t2
        double p12, p1, p2
        double t1_sum, t2_sum, p1_sum, p2_sum
        double c

    #compute the exp outside of the loop
    n = errors.shape[0]
    exp[0] = cmath.exp(<double>mean_mu_alpha[0])
    exp[1] = cmath.exp(<double>mean_mu_alpha[1])
    exp[2] = cmath.exp(<double>mean_mu_alpha[2])
    exp[3] = cmath.exp(<double>mean_mu_alpha[3])
    exp_par_gamma = cmath.exp(par_gamma)
    c = 168.0

    t1_sum = 0.0
    t2_sum = 0.0
    p1_sum = 0.0
    p2_sum = 0.0

    for i in range(n):
        mu10 = errors[i, 0] * exp[0]
        mu11 = exp_par_gamma * mu10
        mu20 = errors[i, 1] * exp[1]
        mu21 = exp_par_gamma * mu20
        alpha1 = errors[i, 2] * exp[2]
        alpha2 = errors[i, 3] * exp[3]

        j_is_larger = mu10 > mu20
        j_is_smaller = not j_is_larger
        threshold2 = (1 + mu10 * alpha1) / (c + alpha1)
        threshold3 = (1 + mu20 * alpha2) / (c + alpha2)

        case1 = j_is_larger * (mu10 < 1 / c)
        case2 = j_is_larger * (mu21 >= threshold2)
        case3 = j_is_larger ^ (case1 | case2)
        case4 = j_is_smaller * (mu20 < 1 / c)
        case5 = j_is_smaller * (mu11 >= threshold3)
        case6 = j_is_smaller ^ (case4 | case5)

        t0 = case1*c+case2 * (c + alpha1 + alpha2) / (1 + mu11 * alpha1 + mu21 * alpha2) +case3 / threshold2 +case4 * c +case5 * (c + alpha1 + alpha2) / (1 + mu11 * alpha1 + mu21 * alpha2) + case6 / threshold3
        t1 = case2 * (t0 * alpha1 * mu11 - alpha1) +case3 * (t0 * alpha1 * mu10 - alpha1) +case5 * (t0 * alpha1 * mu11 - alpha1)
        t2 = c - t0 - t1

        p12 = case2 + case5
        p1 = case3 + p12
        p2 = case6 + p12

        t1_sum += t1
        t2_sum += t2
        p1_sum += p1
        p2_sum += p2

    return t1_sum/n, t2_sum/n, p1_sum/n, p2_sum/n
from libc cimport math as cmath
from libc.stdint cimport *
from libc.stdlib cimport *
from cython.parallel cimport prange

def mktout(list mean_mu_alpha, double[:, ::1] errors, double par_gamma):
    cdef:
        size_t i, n
        double[4] exp
        double exp_par_gamma
        double mu10, mu11, mu20, mu21
        double alpha1, alpha2
        bint j_is_larger, j_is_smaller
        double threshold2, threshold3
        bint case1, case2, case3, case4, case5, case6
        double t0, t1, t2
        double p12, p1, p2
        double t1_sum, t2_sum, p1_sum, p2_sum
        double c

    #compute the exp outside of the loop
    n = errors.shape[0]
    exp[0] = cmath.exp(<double>mean_mu_alpha[0])
    exp[1] = cmath.exp(<double>mean_mu_alpha[1])
    exp[2] = cmath.exp(<double>mean_mu_alpha[2])
    exp[3] = cmath.exp(<double>mean_mu_alpha[3])
    exp_par_gamma = cmath.exp(par_gamma)
    c = 168.0

    t1_sum = 0.0
    t2_sum = 0.0
    p1_sum = 0.0
    p2_sum = 0.0

    for i in range(n):
        mu10 = errors[i, 0] * exp[0]
        mu11 = exp_par_gamma * mu10
        mu20 = errors[i, 1] * exp[1]
        mu21 = exp_par_gamma * mu20
        alpha1 = errors[i, 2] * exp[2]
        alpha2 = errors[i, 3] * exp[3]

        j_is_larger = mu10 > mu20
        j_is_smaller = not j_is_larger
        threshold2 = (1 + mu10 * alpha1) / (c + alpha1)
        threshold3 = (1 + mu20 * alpha2) / (c + alpha2)

        case1 = j_is_larger * (mu10 < 1 / c)
        case2 = j_is_larger * (mu21 >= threshold2)
        case3 = j_is_larger ^ (case1 | case2)
        case4 = j_is_smaller * (mu20 < 1 / c)
        case5 = j_is_smaller * (mu11 >= threshold3)
        case6 = j_is_smaller ^ (case4 | case5)

        t0 = case1*c+case2 * (c + alpha1 + alpha2) / (1 + mu11 * alpha1 + mu21 * alpha2) +case3 / threshold2 +case4 * c +case5 * (c + alpha1 + alpha2) / (1 + mu11 * alpha1 + mu21 * alpha2) + case6 / threshold3
        t1 = case2 * (t0 * alpha1 * mu11 - alpha1) +case3 * (t0 * alpha1 * mu10 - alpha1) +case5 * (t0 * alpha1 * mu11 - alpha1)
        t2 = c - t0 - t1

        p12 = case2 + case5
        p1 = case3 + p12
        p2 = case6 + p12

        t1_sum += t1
        t2_sum += t2
        p1_sum += p1
        p2_sum += p2

    return t1_sum/n, t2_sum/n, p1_sum/n, p2_sum/n

ctypedef struct Vec4:
    double a
    double b
    double c
    double d

def outer_loop(list mean_mu_alpha, double[:, ::1] errors, double par_gamma, size_t n):
    cdef:
        size_t i
        Vec4 mean_vec
        Vec4 out

    mean_vec.a = <double>(mean_mu_alpha[0])
    mean_vec.b = <double>(mean_mu_alpha[1])
    mean_vec.c = <double>(mean_mu_alpha[2])
    mean_vec.d = <double>(mean_mu_alpha[3])

    with nogil:
        for i in prange(n):
            cy_mktout(&out, &mean_vec, errors, par_gamma)
    return out

cdef void cy_mktout(Vec4 *out, Vec4 *mean_mu_alpha, double[:, ::1] errors, double par_gamma) nogil:
    cdef:
        size_t i, n
        double[4] exp
        double exp_par_gamma
        double mu10, mu11, mu20, mu21
        double alpha1, alpha2
        bint j_is_larger, j_is_smaller
        double threshold2, threshold3
        bint case1, case2, case3, case4, case5, case6
        double t0, t1, t2
        double p12, p1, p2
        double t1_sum, t2_sum, p1_sum, p2_sum
        double c

    #compute the exp outside of the loop
    n = errors.shape[0]
    exp[0] = cmath.exp(mean_mu_alpha.a)
    exp[1] = cmath.exp(mean_mu_alpha.b)
    exp[2] = cmath.exp(mean_mu_alpha.c)
    exp[3] = cmath.exp(mean_mu_alpha.d)
    exp_par_gamma = cmath.exp(par_gamma)
    c = 168.0

    t1_sum = 0.0
    t2_sum = 0.0
    p1_sum = 0.0
    p2_sum = 0.0

    for i in range(n):
        mu10 = errors[i, 0] * exp[0]
        mu11 = exp_par_gamma * mu10
        mu20 = errors[i, 1] * exp[1]
        mu21 = exp_par_gamma * mu20
        alpha1 = errors[i, 2] * exp[2]
        alpha2 = errors[i, 3] * exp[3]

        j_is_larger = mu10 > mu20
        j_is_smaller = not j_is_larger
        threshold2 = (1 + mu10 * alpha1) / (c + alpha1)
        threshold3 = (1 + mu20 * alpha2) / (c + alpha2)

        case1 = j_is_larger * (mu10 < 1 / c)
        case2 = j_is_larger * (mu21 >= threshold2)
        case3 = j_is_larger ^ (case1 | case2)
        case4 = j_is_smaller * (mu20 < 1 / c)
        case5 = j_is_smaller * (mu11 >= threshold3)
        case6 = j_is_smaller ^ (case4 | case5)

        t0 = case1*c+case2 * (c + alpha1 + alpha2) / (1 + mu11 * alpha1 + mu21 * alpha2) +case3 / threshold2 +case4 * c +case5 * (c + alpha1 + alpha2) / (1 + mu11 * alpha1 + mu21 * alpha2) + case6 / threshold3
        t1 = case2 * (t0 * alpha1 * mu11 - alpha1) +case3 * (t0 * alpha1 * mu10 - alpha1) +case5 * (t0 * alpha1 * mu11 - alpha1)
        t2 = c - t0 - t1

        p12 = case2 + case5
        p1 = case3 + p12
        p2 = case6 + p12

        t1_sum += t1
        t2_sum += t2
        p1_sum += p1
        p2_sum += p2

    out.a = t1_sum/n
    out.b = t2_sum/n
    out.c = p1_sum/n
    out.d = p2_sum/n
from distutils.core import setup
from Cython.Build import cythonize
from distutils.core import Extension
import numpy as np
import os
import shutil
import platform

libraries = {
    "Linux": [],
    "Windows": [],
}
language = "c"
args = ["-w", "-std=c11", "-O3", "-ffast-math", "-march=native", "-fopenmp"]
link_args = ["-std=c11", "-fopenmp"]

annotate = True
directives = {
    "binding": True,
    "boundscheck": False,
    "wraparound": False,
    "initializedcheck": False,
    "cdivision": True,
    "nonecheck": False,
    "language_level": "3",
    #"c_string_type": "unicode",
    #"c_string_encoding": "utf-8",
}

if __name__ == "__main__":
    system = platform.system()
    libs = libraries[system]
    extensions = []
    ext_modules = []

    #create extensions
    for path, dirs, file_names in os.walk("."):
        for file_name in file_names:
            if file_name.endswith("pyx"):
                ext_path = "{0}/{1}".format(path, file_name)
                ext_name = ext_path \
                    .replace("./", "") \
                    .replace("/", ".") \
                    .replace(".pyx", "")
                ext = Extension(
                    name=ext_name, 
                    sources=[ext_path], 
                    libraries=libs,
                    language=language,
                    extra_compile_args=args,
                    extra_link_args=link_args,
                    include_dirs = [np.get_include()],
                )
                extensions.append(ext)

    #setup all extensions
    ext_modules = cythonize(
        extensions, 
        annotate=annotate, 
        compiler_directives=directives,
    )
    setup(ext_modules=ext_modules)

    """
    #immediately remove build directory
    build_dir = "./build"
    if os.path.exists(build_dir):
        shutil.rmtree(build_dir)
    """
cdef void cy_mktout_if(Vec4 *out, Vec4 *mean_mu_alpha, double[:, ::1] errors, double par_gamma) nogil:
    cdef:
        size_t i, n
        double[4] exp
        double exp_par_gamma
        double mu10, mu11, mu20, mu21
        double alpha1, alpha2
        bint j_is_larger
        double threshold2, threshold3
        bint case1, case2, case3, case4, case5, case6
        double t0, t1, t2
        double p12, p1, p2
        double t1_sum, t2_sum, p1_sum, p2_sum
        double c

    #compute the exp outside of the loop
    n = errors.shape[0]
    exp[0] = cmath.exp(mean_mu_alpha.a)
    exp[1] = cmath.exp(mean_mu_alpha.b)
    exp[2] = cmath.exp(mean_mu_alpha.c)
    exp[3] = cmath.exp(mean_mu_alpha.d)
    exp_par_gamma = cmath.exp(par_gamma)
    c = 168.0

    t1_sum = 0.0
    t2_sum = 0.0
    p1_sum = 0.0
    p2_sum = 0.0

    for i in range(n):
        mu10 = errors[i, 0] * exp[0]
        mu11 = exp_par_gamma * mu10
        mu20 = errors[i, 1] * exp[1]
        mu21 = exp_par_gamma * mu20
        alpha1 = errors[i, 2] * exp[2]
        alpha2 = errors[i, 3] * exp[3]

        j_is_larger = mu10 > mu20
        j_is_smaller = not j_is_larger
        threshold2 = (1 + mu10 * alpha1) / (c + alpha1)
        threshold3 = (1 + mu20 * alpha2) / (c + alpha2)

        if j_is_larger:
            case1 = mu10 < 1 / c
            case2 = mu21 >= threshold2
            case3 = not (case1 | case2)

            t0 = case1*c + case2 * (c + alpha1 + alpha2) / (1 + mu11 * alpha1 + mu21 * alpha2) + case3 / threshold2
            t1 = case2 * (t0 * alpha1 * mu11 - alpha1) + case3 * (t0 * alpha1 * mu10 - alpha1)
            t2 = c - t0 - t1

            t1_sum += t1
            t2_sum += t2
            p1_sum += case2 + case3
            p2_sum += case2

        else:
            case4 = mu20 < 1 / c
            case5 = mu11 >= threshold3
            case6 = not (case4 | case5)

            t0 = case4 * c + case5 * (c + alpha1 + alpha2) / (1 + mu11 * alpha1 + mu21 * alpha2) + case6 / threshold3
            t1 = case5 * (t0 * alpha1 * mu11 - alpha1)
            t2 = c - t0 - t1

            t1_sum += t1
            t2_sum += t2
            p1_sum += case5
            p2_sum += case5 + case6

    out.a = t1_sum/n
    out.b = t2_sum/n
    out.c = p1_sum/n
    out.d = p2_sum/n
outer_loop: 0.5116949229995953 seconds
outer_loop_if: 0.617649456995423 seconds
mktout: 0.9221872320049442 seconds
mktout_if: 1.430276553001022 seconds
python: 10.116664300003322 seconds