Function 如何使用Joblib或其他并行计算优化这个python函数?
使用此函数计算距离矩阵需要花费大量时间。如何优化它?我应该使用作业库/并行计算吗? 这是在一个ML项目的服务器上运行的,执行此任务/功能需要很多时间 寻找优化Python代码的技巧Function 如何使用Joblib或其他并行计算优化这个python函数?,function,optimization,parallel-processing,list-comprehension,joblib,Function,Optimization,Parallel Processing,List Comprehension,Joblib,使用此函数计算距离矩阵需要花费大量时间。如何优化它?我应该使用作业库/并行计算吗? 这是在一个ML项目的服务器上运行的,执行此任务/功能需要很多时间 寻找优化Python代码的技巧 def get_dtw_matrix(self, lotrunnums, target_lotrunnums=None, data_targets=None, data_clusters=None, data_train=None): now = datetime.datetime.now() #pr
def get_dtw_matrix(self, lotrunnums, target_lotrunnums=None, data_targets=None, data_clusters=None, data_train=None):
now = datetime.datetime.now()
#print("inside get_dtw_matrix function - AMKPIV_SP.py",now.strftime("%Y-%m-%d %H:%M:%S"))
if data_train is None:
data_train, metric_preprocess = self.preprocess()
data_lotrunnums=self.get_data_to_dict(data_train, lotrunnums)
#print("data_lotrunnums calculated",now.strftime("%Y-%m-%d %H:%M:%S"))
if data_targets is None:
#print("data_targets is None here...",now.strftime("%Y-%m-%d %H:%M:%S"))
data_targets=self.get_data_to_dict(data_train, target_lotrunnums)
data=data_lotrunnums
targets=[]
clusters=[]
targets=data_targets.keys()
data.update(data_targets)
if data_clusters is not None:
clusters=data_clusters.keys()
data.update(data_clusters)
lotrunnums=data.keys()
indexes, columns=lotrunnums, lotrunnums
distance_matrix = pd.DataFrame(index=indexes, columns=columns)
lotrunnums=distance_matrix.columns
for i in range(len(lotrunnums)):
lotrunnum1=lotrunnums[i]
trace1=data[lotrunnum1]
for j in range(i, len(lotrunnums)):
lotrunnum2=lotrunnums[j]
trace2=data[lotrunnum2]
if lotrunnum1==lotrunnum2:
distance_matrix.loc[lotrunnum1, lotrunnum2]=0
else:
distanceDTW=DTWDistanceAMKPIV(self.distance_tol, self.distance_lagmax,self.distancetype, self.remove_repeated)
res1=distanceDTW.distance_to_target(trace2, trace1)
if lotrunnum1 in targets:
distance=float(res1[0]["distance"])
else:
res2=distanceDTW.distance_to_target(trace1, trace2)
distance=min(float(res1[0]["distance"]), float(res2[0]["distance"]))
distance_matrix.loc[lotrunnum1, lotrunnum2]=distance
distance_matrix.loc[lotrunnum2, lotrunnum1]=distance
indexes=dict({"target": list(targets), "cluster": list(clusters)})
#print("returning data of get_dtw_matrix",now.strftime("%Y-%m-%d %H:%M:%S"))
return distance_matrix, indexes