Python/Numpy优化

Python/Numpy优化,python,numpy,micro-optimization,Python,Numpy,Micro Optimization,我有一个简单的python类,大约有40行计算,随后给出了一个用例示例,它执行一个简单的计算(基于密度之间的L2距离的独立性测试),并且只需要100个点和100个boostrap就可以进行大量计算。下面是代码和一些测试数据: 将numpy导入为np 类别独立性测试: 定义初始(自我、数据、a、b、尺寸到测试、模拟数量=1000): #重新缩放数据: self.i=尺寸到尺寸测试 self.data=(数据-a)/(b-a) self.d=self.data.shape[1] self.n=sel

我有一个简单的python类,大约有40行计算,随后给出了一个用例示例,它执行一个简单的计算(基于密度之间的L2距离的独立性测试),并且只需要100个点和100个boostrap就可以进行大量计算。下面是代码和一些测试数据:

将numpy导入为np
类别独立性测试:
定义初始(自我、数据、a、b、尺寸到测试、模拟数量=1000):
#重新缩放数据:
self.i=尺寸到尺寸测试
self.data=(数据-a)/(b-a)
self.d=self.data.shape[1]
self.n=self.data.shape[0]
self.N=模拟的数量
self.data_restricted=np.hstack((self.data[:,:(self.i-1)],self.data[:,(self.i+1):]))
self.emp\u cop\u restricted=np.array([np.mean(np.array)([np.sum(dat self.i)])
返回np.hstack((self.data[:,before],unif,self.data[:,after]))
def mse(自身、数据):

emp_cop=np.array([np.mean(np.array)([np.sum(dat我认为hstack会导致代码性能下降,如果你没有什么要点,那么就改为测试np.delete(data,0,I)。

一般来说,python中的列表理解速度较慢,效率更高。在你的特定情况下,
np.array([np.sum(dat我用numba和广播做了一个解决方案,正如另一个回答中提到的。代码必须稍微重组。numba是否更快取决于实验的确切数量和
数据的形状。我还做了一个变体,使用了numba不支持的稍微不同的广播方法(由于它矢量化了双循环而非单循环,所以被创造为高级)。从'method'关键字可以使用三种方法

import numpy as np
import numba

@numba.njit(fastmath=True)
def emp_cop_numba(data, self_data, shape):
    emp_cop = np.zeros(shape[0])
    for i in range(shape[0]):
        u = self_data[i]
        emp_cop[i] = np.mean(np.sum(data <= u, axis=1) == shape[1])
        i += 1
    return emp_cop

def emp_cop_simple_broadcasting(data, self_data, shape):
    emp_cop = np.zeros(shape[0])
    for i, u in enumerate(self_data):
        emp_cop[i] = np.mean(np.sum(data <= u, axis=1) == shape[1])
    return emp_cop

def emp_cop_advanced_broadcasting(data, self_data, shape):
    emp_cop = np.mean(np.sum(data[None,:,:] <= self_data[:,None,:], axis=2) == shape[1], axis=1)
    return emp_cop

class IndependenceTesting:
    def __init__(self, data, a, b, dim_to_test, number_of_simulation=1000, method='numba'):
        # rescale the data :
        self.i = dim_to_test
        self.data = (data - a)/(b-a)
        self.d = self.data.shape[1]
        self.n = self.data.shape[0]
        self.N = number_of_simulation
        self.method = method

        self.data_restricted = np.hstack((self.data[:,:(self.i - 1)], self.data[:,(self.i + 1):]))
        self.emp_cop_restricted = np.array([np.mean(np.array([np.sum(dat <= u) for dat in self.data_restricted]) == self.d - 1) for u in self.data_restricted])

    def simulated_dataset(self):
        unif = np.random.uniform(size=(self.n,1))
        before = [x for x in range(self.d) if x < self.i] 
        after = [x for x in range(self.d) if x > self.i]
        return np.hstack((self.data[:,before],unif,self.data[:,after]))

    def mse(self, data):
        if self.method == 'numba':
            emp_cop = emp_cop_numba(data, self.data, self.data.shape)
        elif self.method == 'simple_broadcasting':
            emp_cop = emp_cop_simple_broadcasting(data, self.data, self.data.shape)
        elif self.method == 'advanced_broadcasting':
            emp_cop = emp_cop_advanced_broadcasting(data, self.data, self.data.shape)

        return ((emp_cop - self.emp_cop_restricted)**2).mean()

    def mse_distribution(self):
        return np.array([self.mse(self.simulated_dataset()) for i in np.arange(self.N)])

    def mse_observed(self):
        return self.mse(self.data)

    def quantile(self):
        return np.mean(self.mse_distribution() < self.mse_observed())

    def p_value(self):
        return 1 - self.quantile()

np.random.seed(42)
points = np.random.random((100,3))
points[:,2] = 1 - points[:,2]
points = np.concatenate((points,np.array(np.random.uniform(size=points.shape[0])).reshape((points.shape[0],1))),axis=1)

for method in ('numba', 'simple_broadcasting', 'advanced_broadcasting'):
    np.random.seed(42)
    p_values = [IndependenceTesting(points,np.repeat(0,4),np.repeat(1,4),dim_to_test = i,number_of_simulation=100, method=method).p_value() for i in np.arange(points.shape[1])]
    print(p_values)

这是对@user2653663 Numba方法的评论。如果您想真正有效地使用Numba,建议您编写简单的循环,而在Numpy中,您将尝试避免任何显式循环

示例

import numba as nb
import numpy as np

@numba.njit(parallel=True)
def emp_cop_numba_2(data):
    emp_cop=np.empty(data.shape[0])

    for i in numba.prange(data.shape[0]):
        count_2=0
        for j in range(data.shape[0]):
            count=0
            for k in range(data.shape[1]):
                if data[j,k]<=data[i,k]:
                    count+=1
            if count==data.shape[1]:
                count_2+=1
        emp_cop[i]=count_2/data.shape[0]
    return emp_cop

根据
cProfile
,问题在于
mse
中的列表理解,而不是
hstack
%timeit emp_cop_numba(data, self_data, shape)
11.1 ms ± 517 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)

%timeit emp_cop_simple_broadcasting(data, self_data, shape, emp_cop_restricted)
57.7 ms ± 441 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)

%timeit emp_cop_advanced_broadcasting(data, self_data, shape, emp_cop_restricted)
41.6 ms ± 552 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
import numba as nb
import numpy as np

@numba.njit(parallel=True)
def emp_cop_numba_2(data):
    emp_cop=np.empty(data.shape[0])

    for i in numba.prange(data.shape[0]):
        count_2=0
        for j in range(data.shape[0]):
            count=0
            for k in range(data.shape[1]):
                if data[j,k]<=data[i,k]:
                    count+=1
            if count==data.shape[1]:
                count_2+=1
        emp_cop[i]=count_2/data.shape[0]
    return emp_cop
points=np.random.rand(1000,3)

#user2653663
%timeit res1=emp_cop_numba(points, points, points.shape)
#7.73 ms ± 69.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)

#parallel=False, faster for very small arrays, but slower for larger ones
%timeit res2=emp_cop_numba_2(points)
#2.56 ms ± 36.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)

#parallel=True
%timeit res=emp_cop_numba_2(points)
#487 µs ± 21.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)