Python 3.x 使用GridSearchCV和RandomizedSearchCV时出错_Python 3.x_Scipy_Scikit Learn_Sparse Matrix_Cross Validation

Python 3.x 使用GridSearchCV和RandomizedSearchCV时出错

python-3.x scikit-learn

Python 3.x 使用GridSearchCV和RandomizedSearchCV时出错,python-3.x,scipy,scikit-learn,sparse-matrix,cross-validation,Python 3.x,Scipy,Scikit Learn,Sparse Matrix,Cross Validation,当尝试使用或来拟合我的培训数据时，我不断遇到以下错误： TypeError:不支持类型的转换：（dtype（'O'）、dtype（'O'））以下是相关代码的示例： from xgboost.sklearn import XGBRegressor as XGR from sklearn.model_selection import RandomizedSearchCV, GridSearchCV xgbRegModel = XGR() params = {'max_depth':[3, 6,

当尝试使用或来拟合我的培训数据时，我不断遇到以下错误：

TypeError:不支持类型的转换：（dtype（'O'）、dtype（'O'））

以下是相关代码的示例：

from xgboost.sklearn import XGBRegressor as XGR
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV

xgbRegModel = XGR()
params = {'max_depth':[3, 6, 9], 'learning_rate':[.05, .1, .5], 'n_estimators': [50, 100, 200]}

rscv = RandomizedSearchCV(xgbRegModel, params)  
rscv.fit(X, y)  
rscv.best_model_

其中，

是a（39942112577），

是a（39942，）

这些数据类型要么是

int64

要么是

float64

，我已经试着用

np.nan

值运行它，然后用0填充

np.nan

值。。。（我想这可能是问题所在，但不是。）

谁能告诉我这里发生了什么事？当我在不使用GridSearchCV或RandomizedSearchCV的情况下训练模型时，效果很好

任何想法都将不胜感激-谢谢

ps-错误的回溯确实很长，但如果有帮助的话，它就在这里

TypeError                                 Traceback (most recent call last)
<ipython-input-54-63d54d4cd03e> in <module>()
      3 xgbRegModel = XGR()
      4 rscv = RandomizedSearchCV(xgbRegModel, params)
----> 5 rscv.fit(X, y)
      6 rscv.best_model_

~\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py in fit(self, X, y, groups, **fit_params)
    636                                   error_score=self.error_score)
    637           for parameters, (train, test) in product(candidate_params,
--> 638                                                    cv.split(X, y, groups)))
    639 
    640         # if one choose to see train score, "out" will contain train  score info

~\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self, iterable)
    777             # was dispatched. In particular this covers the edge
    778             # case of Parallel used with an exhausted iterator.
--> 779             while self.dispatch_one_batch(iterator):
    780                 self._iterating = True
    781             else:

~\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in dispatch_one_batch(self, iterator)
    623                 return False
    624             else:
--> 625                 self._dispatch(tasks)
    626                 return True
    627 

~\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in _dispatch(self, batch)
    586         dispatch_timestamp = time.time()
    587         cb = BatchCompletionCallBack(dispatch_timestamp, len(batch), self)
--> 588         job = self._backend.apply_async(batch, callback=cb)
    589         self._jobs.append(job)
    590 

~\Anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in apply_async(self, func, callback)
    109     def apply_async(self, func, callback=None):
    110         """Schedule a func to be run"""
--> 111         result = ImmediateResult(func)
    112         if callback:
    113             callback(result)

~\Anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in __init__(self, batch)
    330         # Don't delay the application, to avoid keeping the input
    331         # arguments in memory
--> 332         self.results = batch()
    333 
    334     def get(self):

~\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
    132 
    133     def __len__(self):

~\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in     <listcomp>(.0)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
    132 
    133     def __len__(self):

~\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, error_score)
    425     start_time = time.time()
    426 
--> 427     X_train, y_train = _safe_split(estimator, X, y, train)
    428     X_test, y_test = _safe_split(estimator, X, y, test, train)
    429 

~\Anaconda3\lib\site-packages\sklearn\utils\metaestimators.py in _safe_split(estimator, X, y, indices, train_indices)
    198             X_subset = X[np.ix_(indices, train_indices)]
    199     else:
--> 200         X_subset = safe_indexing(X, indices)
    201 
    202     if y is not None:

~\Anaconda3\lib\site-packages\sklearn\utils\__init__.py in safe_indexing(X, indices)
    160             return X.take(indices, axis=0)
    161         else:
--> 162             return X[indices]
    163     else:
    164         return [X[idx] for idx in indices]

~\Anaconda3\lib\site-packages\scipy\sparse\csr.py in __getitem__(self, key)
    315             if isintlike(col) or isinstance(col,slice):
    316                 P = extractor(row, self.shape[0])     # [[1,2],j] or [[1,2],1:2]
--> 317                 extracted = P * self
    318                 if col == slice(None, None, None):
    319                     return extracted

~\Anaconda3\lib\site-packages\scipy\sparse\base.py in __mul__(self, other)
    367             if self.shape[1] != other.shape[0]:
    368                 raise ValueError('dimension mismatch')
--> 369             return self._mul_sparse_matrix(other)
    370 
    371         # If it's a list or whatever, treat it like a matrix

~\Anaconda3\lib\site-packages\scipy\sparse\compressed.py in _mul_sparse_matrix(self, other)
    539         indptr = np.asarray(indptr, dtype=idx_dtype)
    540         indices = np.empty(nnz, dtype=idx_dtype)
--> 541         data = np.empty(nnz, dtype=upcast(self.dtype, other.dtype))
    542 
    543         fn = getattr(_sparsetools, self.format + '_matmat_pass2')

~\Anaconda3\lib\site-packages\scipy\sparse\sputils.py in upcast(*args)
     49             return t
     50 
---> 51     raise TypeError('no supported conversion for types: %r' % (args,))
     52 
     53 

TypeError: no supported conversion for types: (dtype('O'), dtype('O'))

TypeError回溯（最近一次调用）
在（）
3 xgbRegModel=XGR（）
4 rscv=随机搜索CV（xgbRegModel，参数）
---->5 rscv.配合（X，y）
6 rscv.best_模型_
~\Anaconda3\lib\site packages\sklearn\model\u selection\\u search.py in fit（self、X、y、groups、**fit\u参数）
636错误分数=自我错误分数）
637对于产品中的参数（训练、测试）（候选参数，
-->638 cv.分割（X、y、组）
639
640#如果选择查看列车分数，“out”将包含列车分数信息
调用中的~\Anaconda3\lib\site packages\sklearn\externals\joblib\parallel.py（self，iterable）
777#被派遣。特别是，这覆盖了边缘
778#与耗尽迭代器一起使用的并行情况。
-->779自调度一批时（迭代器）：
780自迭代=真
781其他：
~\Anaconda3\lib\site packages\sklearn\externals\joblib\parallel.py在dispatch\u one\u批处理中（self，迭代器）
623返回错误
624其他：
-->625自我派遣（任务）
626返回真值
627
~\Anaconda3\lib\site packages\sklearn\externals\joblib\parallel.py in\u dispatch（self，batch）
586 dispatch_timestamp=time.time（）
587 cb=BatchCompletionCallBack（调度时间戳，len（批处理），self）
-->588 job=self.\u backend.apply\u async（批处理，回调=cb）
589 self.\u jobs.append（作业）
590
~\Anaconda3\lib\site packages\sklearn\externals\joblib\\u parallel\u backends.py in apply\u async（self、func、callback）
109 def apply_async（self、func、callback=None）：
110“计划要运行的func”
-->111结果=立即结果（func）
112如果回调：
113回调（结果）
~\Anaconda3\lib\site packages\sklearn\externals\joblib\\u parallel\u backends.py in\uuuuuuuu init\uuuu（self，batch）
330#不要延迟应用程序，以免保留输入
331#内存中的参数
-->332 self.results=batch（）
333
334 def get（自我）：
~\Anaconda3\lib\site packages\sklearn\externals\joblib\parallel.py in\uuuuu调用（self）
129
130 def呼叫（自我）：
-->131返回[func（*args，**kwargs），用于self.items中的func、args、kwargs]
132
133定义长度（自）：
~\Anaconda3\lib\site packages\sklearn\externals\joblib\parallel.py in（.0）
129
130 def呼叫（自我）：
-->131返回[func（*args，**kwargs），用于self.items中的func、args、kwargs]
132
133定义长度（自）：
~\Anaconda3\lib\site packages\sklearn\model\u selection\\u validation.py in\u fit\u和\u score（估计器、X、y、记分器、训练、测试、详细、参数、拟合参数、返回训练分数、返回参数、返回测试样本、返回次数、错误分数）
425开始时间=时间。时间（）
426
-->427 X_序列，y_序列=_安全分割（估算器，X，y，序列）
428 X_测试，y_测试=_安全分割（估计器，X，y，测试，列车）
429
~\Anaconda3\lib\site packages\sklearn\utils\metaestimators.py在安全分割中（估计器，X，y，指数，序列指数）
198 X_子集=X[np.ix_（指数，序列指数）]
199其他：
-->200 X_子集=安全索引（X，索引）
201
202如果y不是无：
安全索引中的~\Anaconda3\lib\site packages\sklearn\utils\\uuuuu init\uuuuuu.py（X，索引）
160返回X.take（指数，轴=0）
161其他：
-->162返回X[指数]
163其他：
164返回[X[idx]用于索引中的idx]
~\Anaconda3\lib\site packages\scipy\sparse\csr.py in\uuuuu getitem\uuuuuuu（self，key）
315如果isintlike（列）或isinstance（列，切片）：
316 P=提取器（行，自形[0]）#[[1,2]，j]或[[1,2]，1:2]
-->317提取=P*self
318如果列==切片（无，无，无）：
319返回提取
~\Anaconda3\lib\site packages\scipy\sparse\base.py in\uuuuuu mul\uuuuu（self，other）
367如果自我塑造[1]！=其他。形状[0]：
368提升值错误（“维度不匹配”）
-->369返回自多稀疏矩阵（其他）
370
371#如果它是一个列表或其他什么，请将其视为矩阵
矩阵中的~\Anaconda3\lib\site packages\scipy\sparse\compressed.py（自身、其他）
539 indptr=np.asarray（indptr，dtype=idx\u dtype）
540索引=np.空（nnz，dtype=idx_dtype）
-->541 data=np.empty（nnz，dtype=upcast（self.dtype，other.dtype））
542
543 fn=getattr（_sparsetools，self.format+'_matmatmat_pass2'）
上传中的~\Anaconda3\lib\site packages\scipy\sparse\sputils.py（*args）
49返回t
50
--->51 raise TypeError（'不支持类型转换：%r'（args，））
52
53
TypeError:不支持类型的转换：（dtype（'O'）、dtype（'O'））

那就是

X : array-like, shape = [n_samples, n_features]