Numpy _参数) 249如果self.\u final\u估计器不是无: -->250自我最终估计值拟合(Xt,y,**拟合参数) 251返回自我 252 /用户/emigre459/anaconda3/envs/ML_MiniProjects/lib/python2.7/site-packages/sklearn/pipeline.pyc-in-fit(self,X,y,**fit_参数) 246这个估计器 247 """ -->248 Xt,拟合参数=自拟合(X,y,**拟合参数) 249如果self.\u final\u估计器不是无: 250自我最终估计值拟合(Xt,y,**拟合参数) /Users/emigre459/anaconda3/envs/ML_MiniProjects/lib/python2.7/site-packages/sklearn/pipeline.pyc in_fit(self,X,y,**fit_参数) 211 Xt,安装的变压器=安装的变压器( 212个变压器,无,Xt,y, -->213**fit_参数_步骤[名称]) 214#用安装的变压器更换阶梯变压器 215#变压器。这在加载变压器时是必要的 /Users/emigre459/anaconda3/envs/ML_MiniProjects/lib/python2.7/site-packages/sklearn/externals/joblib/memory.pyc in_uuuuu调用(self,*args,**kwargs) 360 361定义调用(self,*args,**kwargs): -->362返回self.func(*args,**kwargs) 363 364 def呼叫和搁置(self、*args、**kwargs): /Users/emigre459/anaconda3/envs/ML_MiniProjects/lib/python2.7/site-packages/sklearn/pipeline.pyc in_fit_transform_one(变压器、重量、X、y、**fit_参数) 579**拟合参数): 580如果hasattr(变压器,“配合变换”): -->581 res=变换器。拟合变换(X,y,**拟合参数) 582其他: 583 res=变换器.fit(X,y,**拟合参数).transform(X) in-fit_变换(self,X,y) 138 ''' 139 -->140自适配(X,y) 141返回自变换(X) 合适(自身、X、y) 73#大多数负特征需要检查df分位数(1-q) 74#为了使用正确的分位数值 --->75位=np百分位(X[:,自我特征列表],自我q*100,轴=0) 76负=np百分位(X[:,自特性列表],(1.0-自特性q)*100,轴=0) 77 索引器:索引10超出大小为10的轴1的界限
当您将管道发送到GridSearchCV时,Numpy _参数) 249如果self.\u final\u估计器不是无: -->250自我最终估计值拟合(Xt,y,**拟合参数) 251返回自我 252 /用户/emigre459/anaconda3/envs/ML_MiniProjects/lib/python2.7/site-packages/sklearn/pipeline.pyc-in-fit(self,X,y,**fit_参数) 246这个估计器 247 """ -->248 Xt,拟合参数=自拟合(X,y,**拟合参数) 249如果self.\u final\u估计器不是无: 250自我最终估计值拟合(Xt,y,**拟合参数) /Users/emigre459/anaconda3/envs/ML_MiniProjects/lib/python2.7/site-packages/sklearn/pipeline.pyc in_fit(self,X,y,**fit_参数) 211 Xt,安装的变压器=安装的变压器( 212个变压器,无,Xt,y, -->213**fit_参数_步骤[名称]) 214#用安装的变压器更换阶梯变压器 215#变压器。这在加载变压器时是必要的 /Users/emigre459/anaconda3/envs/ML_MiniProjects/lib/python2.7/site-packages/sklearn/externals/joblib/memory.pyc in_uuuuu调用(self,*args,**kwargs) 360 361定义调用(self,*args,**kwargs): -->362返回self.func(*args,**kwargs) 363 364 def呼叫和搁置(self、*args、**kwargs): /Users/emigre459/anaconda3/envs/ML_MiniProjects/lib/python2.7/site-packages/sklearn/pipeline.pyc in_fit_transform_one(变压器、重量、X、y、**fit_参数) 579**拟合参数): 580如果hasattr(变压器,“配合变换”): -->581 res=变换器。拟合变换(X,y,**拟合参数) 582其他: 583 res=变换器.fit(X,y,**拟合参数).transform(X) in-fit_变换(self,X,y) 138 ''' 139 -->140自适配(X,y) 141返回自变换(X) 合适(自身、X、y) 73#大多数负特征需要检查df分位数(1-q) 74#为了使用正确的分位数值 --->75位=np百分位(X[:,自我特征列表],自我q*100,轴=0) 76负=np百分位(X[:,自特性列表],(1.0-自特性q)*100,轴=0) 77 索引器:索引10超出大小为10的轴1的界限,numpy,machine-learning,scikit-learn,pipeline,Numpy,Machine Learning,Scikit Learn,Pipeline,当您将管道发送到GridSearchCV时,最佳估算器\uu还包含管道对象(无论您是仅调整管道的单个部分还是所有部分) 所以当你这样做的时候: knn_pipe_tuned = Pipeline([('impute', imp), ('engineer',topQ), ('scale', scaler), ('select', selector), ('kNN', knn_gs.best_estimator_)]) 你基本上是这样做的: knn_pipe_tun
最佳估算器\uu
还包含管道对象(无论您是仅调整管道的单个部分还是所有部分)
所以当你这样做的时候:
knn_pipe_tuned = Pipeline([('impute', imp), ('engineer',topQ), ('scale', scaler),
('select', selector), ('kNN', knn_gs.best_estimator_)])
你基本上是这样做的:
knn_pipe_tuned = Pipeline([('impute', imp), ('engineer',topQ), ('scale', scaler),
('select', selector), ('kNN', Pipeline([('impute', imp),
('engineer',topQ),
('scale', scaler),
('select', selector),
('kNN', knn)]))])
因此,这将再次插补
,工程师
,缩放
,选择已经通过所有这些的数据。我确信这不是您想要的
执行交叉验证时,您只需执行以下操作:
knn_pipe_tuned = knn_gs.best_estimator_
哦!我完全错过了。我一直认为最佳估计量\uuu
只是估计量本身,而不是整个管道,但你所说的完全有道理。既然代码现在运行顺利,产生的结果看起来是适当的规模,我正在标记这一点。谢谢你的帮助!一半相关的to您的问题,但通过设置knn_pipe\u tuned=knn_gs
,或只是knn_scores=cross_validate(knn_gs,features,label,groups=None,score=['precision','recall','f1'],cv=cv_1000)
,您可以进行嵌套交叉验证,从而获得无偏的cv结果。
knn_pipe_tuned = Pipeline([('impute', imp), ('engineer',topQ), ('scale', scaler),
('select', selector), ('kNN', knn_gs.best_estimator_)])
cv_1000 = StratifiedShuffleSplit(n_splits=1000, test_size=0.2, random_state=42)
from sklearn.model_selection import cross_validate
knn_scores = cross_validate(knn_pipe_tuned, features, labels, groups=None,
scoring=['precision', 'recall', 'f1'], cv=cv_1000)
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-147-4f04d5e63a0b> in <module>()
12 from sklearn.model_selection import cross_validate
13 knn_scores = cross_validate(knn_pipe_tuned, features, labels, groups=None,
---> 14 scoring=['precision', 'recall', 'f1'], cv=cv_1000)
15
16 knn_cv_results = pd.DataFrame(knn_scores)
/Users/emigre459/anaconda3/envs/ML_MiniProjects/lib/python2.7/site-packages/sklearn/model_selection/_validation.pyc in cross_validate(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, return_train_score)
204 fit_params, return_train_score=return_train_score,
205 return_times=True)
--> 206 for train, test in cv.split(X, y, groups))
207
208 if return_train_score:
/Users/emigre459/anaconda3/envs/ML_MiniProjects/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.pyc in __call__(self, iterable)
777 # was dispatched. In particular this covers the edge
778 # case of Parallel used with an exhausted iterator.
--> 779 while self.dispatch_one_batch(iterator):
780 self._iterating = True
781 else:
/Users/emigre459/anaconda3/envs/ML_MiniProjects/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.pyc in dispatch_one_batch(self, iterator)
623 return False
624 else:
--> 625 self._dispatch(tasks)
626 return True
627
/Users/emigre459/anaconda3/envs/ML_MiniProjects/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.pyc in _dispatch(self, batch)
586 dispatch_timestamp = time.time()
587 cb = BatchCompletionCallBack(dispatch_timestamp, len(batch), self)
--> 588 job = self._backend.apply_async(batch, callback=cb)
589 self._jobs.append(job)
590
/Users/emigre459/anaconda3/envs/ML_MiniProjects/lib/python2.7/site-packages/sklearn/externals/joblib/_parallel_backends.pyc in apply_async(self, func, callback)
109 def apply_async(self, func, callback=None):
110 """Schedule a func to be run"""
--> 111 result = ImmediateResult(func)
112 if callback:
113 callback(result)
/Users/emigre459/anaconda3/envs/ML_MiniProjects/lib/python2.7/site-packages/sklearn/externals/joblib/_parallel_backends.pyc in __init__(self, batch)
330 # Don't delay the application, to avoid keeping the input
331 # arguments in memory
--> 332 self.results = batch()
333
334 def get(self):
/Users/emigre459/anaconda3/envs/ML_MiniProjects/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.pyc in __call__(self)
129
130 def __call__(self):
--> 131 return [func(*args, **kwargs) for func, args, kwargs in self.items]
132
133 def __len__(self):
/Users/emigre459/anaconda3/envs/ML_MiniProjects/lib/python2.7/site-packages/sklearn/model_selection/_validation.pyc in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, error_score)
456 estimator.fit(X_train, **fit_params)
457 else:
--> 458 estimator.fit(X_train, y_train, **fit_params)
459
460 except Exception as e:
/Users/emigre459/anaconda3/envs/ML_MiniProjects/lib/python2.7/site-packages/sklearn/pipeline.pyc in fit(self, X, y, **fit_params)
248 Xt, fit_params = self._fit(X, y, **fit_params)
249 if self._final_estimator is not None:
--> 250 self._final_estimator.fit(Xt, y, **fit_params)
251 return self
252
/Users/emigre459/anaconda3/envs/ML_MiniProjects/lib/python2.7/site-packages/sklearn/pipeline.pyc in fit(self, X, y, **fit_params)
246 This estimator
247 """
--> 248 Xt, fit_params = self._fit(X, y, **fit_params)
249 if self._final_estimator is not None:
250 self._final_estimator.fit(Xt, y, **fit_params)
/Users/emigre459/anaconda3/envs/ML_MiniProjects/lib/python2.7/site-packages/sklearn/pipeline.pyc in _fit(self, X, y, **fit_params)
211 Xt, fitted_transformer = fit_transform_one_cached(
212 cloned_transformer, None, Xt, y,
--> 213 **fit_params_steps[name])
214 # Replace the transformer of the step with the fitted
215 # transformer. This is necessary when loading the transformer
/Users/emigre459/anaconda3/envs/ML_MiniProjects/lib/python2.7/site-packages/sklearn/externals/joblib/memory.pyc in __call__(self, *args, **kwargs)
360
361 def __call__(self, *args, **kwargs):
--> 362 return self.func(*args, **kwargs)
363
364 def call_and_shelve(self, *args, **kwargs):
/Users/emigre459/anaconda3/envs/ML_MiniProjects/lib/python2.7/site-packages/sklearn/pipeline.pyc in _fit_transform_one(transformer, weight, X, y, **fit_params)
579 **fit_params):
580 if hasattr(transformer, 'fit_transform'):
--> 581 res = transformer.fit_transform(X, y, **fit_params)
582 else:
583 res = transformer.fit(X, y, **fit_params).transform(X)
<ipython-input-108-dfcab4b62582> in fit_transform(self, X, y)
138 '''
139
--> 140 self.fit(X, y)
141 return self.transform(X)
<ipython-input-108-dfcab4b62582> in fit(self, X, y)
73 #Majority-negative features need to check df.quantile(1-q)
74 #in order to be using correct quantile value
---> 75 pos = np.percentile(X[:, self.feature_list], self.q * 100, axis = 0)
76 neg = np.percentile(X[:, self.feature_list], (1.0 - self.q) * 100, axis = 0)
77
IndexError: index 10 is out of bounds for axis 1 with size 10
knn_pipe_tuned = Pipeline([('impute', imp), ('engineer',topQ), ('scale', scaler),
('select', selector), ('kNN', knn_gs.best_estimator_)])
knn_pipe_tuned = Pipeline([('impute', imp), ('engineer',topQ), ('scale', scaler),
('select', selector), ('kNN', Pipeline([('impute', imp),
('engineer',topQ),
('scale', scaler),
('select', selector),
('kNN', knn)]))])
knn_pipe_tuned = knn_gs.best_estimator_