Python 3.x 使用GridSearchCV和RandomizedSearchCV时出错

Python 3.x 使用GridSearchCV和RandomizedSearchCV时出错,python-3.x,scipy,scikit-learn,sparse-matrix,cross-validation,Python 3.x,Scipy,Scikit Learn,Sparse Matrix,Cross Validation,当尝试使用或来拟合我的培训数据时,我不断遇到以下错误: TypeError:不支持类型的转换:(dtype('O')、dtype('O')) 以下是相关代码的示例: from xgboost.sklearn import XGBRegressor as XGR from sklearn.model_selection import RandomizedSearchCV, GridSearchCV xgbRegModel = XGR() params = {'max_depth':[3, 6,

当尝试使用或来拟合我的培训数据时,我不断遇到以下错误:

TypeError:不支持类型的转换:(dtype('O')、dtype('O'))

以下是相关代码的示例:

from xgboost.sklearn import XGBRegressor as XGR
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV

xgbRegModel = XGR()
params = {'max_depth':[3, 6, 9], 'learning_rate':[.05, .1, .5], 'n_estimators': [50, 100, 200]}

rscv = RandomizedSearchCV(xgbRegModel, params)  
rscv.fit(X, y)  
rscv.best_model_
其中,
X
是a(39942112577),
y
是a(39942,)

这些数据类型要么是
int64
要么是
float64
,我已经试着用
np.nan
值运行它,然后用0填充
np.nan
值。。。(我想这可能是问题所在,但不是。)

谁能告诉我这里发生了什么事?当我在不使用GridSearchCV或RandomizedSearchCV的情况下训练模型时,效果很好

任何想法都将不胜感激-谢谢

ps-错误的回溯确实很长,但如果有帮助的话,它就在这里

TypeError                                 Traceback (most recent call last)
<ipython-input-54-63d54d4cd03e> in <module>()
      3 xgbRegModel = XGR()
      4 rscv = RandomizedSearchCV(xgbRegModel, params)
----> 5 rscv.fit(X, y)
      6 rscv.best_model_

~\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py in fit(self, X, y, groups, **fit_params)
    636                                   error_score=self.error_score)
    637           for parameters, (train, test) in product(candidate_params,
--> 638                                                    cv.split(X, y, groups)))
    639 
    640         # if one choose to see train score, "out" will contain train  score info

~\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self, iterable)
    777             # was dispatched. In particular this covers the edge
    778             # case of Parallel used with an exhausted iterator.
--> 779             while self.dispatch_one_batch(iterator):
    780                 self._iterating = True
    781             else:

~\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in dispatch_one_batch(self, iterator)
    623                 return False
    624             else:
--> 625                 self._dispatch(tasks)
    626                 return True
    627 

~\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in _dispatch(self, batch)
    586         dispatch_timestamp = time.time()
    587         cb = BatchCompletionCallBack(dispatch_timestamp, len(batch), self)
--> 588         job = self._backend.apply_async(batch, callback=cb)
    589         self._jobs.append(job)
    590 

~\Anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in apply_async(self, func, callback)
    109     def apply_async(self, func, callback=None):
    110         """Schedule a func to be run"""
--> 111         result = ImmediateResult(func)
    112         if callback:
    113             callback(result)

~\Anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in __init__(self, batch)
    330         # Don't delay the application, to avoid keeping the input
    331         # arguments in memory
--> 332         self.results = batch()
    333 
    334     def get(self):

~\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
    132 
    133     def __len__(self):

~\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in     <listcomp>(.0)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
    132 
    133     def __len__(self):

~\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, error_score)
    425     start_time = time.time()
    426 
--> 427     X_train, y_train = _safe_split(estimator, X, y, train)
    428     X_test, y_test = _safe_split(estimator, X, y, test, train)
    429 

~\Anaconda3\lib\site-packages\sklearn\utils\metaestimators.py in _safe_split(estimator, X, y, indices, train_indices)
    198             X_subset = X[np.ix_(indices, train_indices)]
    199     else:
--> 200         X_subset = safe_indexing(X, indices)
    201 
    202     if y is not None:

~\Anaconda3\lib\site-packages\sklearn\utils\__init__.py in safe_indexing(X, indices)
    160             return X.take(indices, axis=0)
    161         else:
--> 162             return X[indices]
    163     else:
    164         return [X[idx] for idx in indices]

~\Anaconda3\lib\site-packages\scipy\sparse\csr.py in __getitem__(self, key)
    315             if isintlike(col) or isinstance(col,slice):
    316                 P = extractor(row, self.shape[0])     # [[1,2],j] or [[1,2],1:2]
--> 317                 extracted = P * self
    318                 if col == slice(None, None, None):
    319                     return extracted

~\Anaconda3\lib\site-packages\scipy\sparse\base.py in __mul__(self, other)
    367             if self.shape[1] != other.shape[0]:
    368                 raise ValueError('dimension mismatch')
--> 369             return self._mul_sparse_matrix(other)
    370 
    371         # If it's a list or whatever, treat it like a matrix

~\Anaconda3\lib\site-packages\scipy\sparse\compressed.py in _mul_sparse_matrix(self, other)
    539         indptr = np.asarray(indptr, dtype=idx_dtype)
    540         indices = np.empty(nnz, dtype=idx_dtype)
--> 541         data = np.empty(nnz, dtype=upcast(self.dtype, other.dtype))
    542 
    543         fn = getattr(_sparsetools, self.format + '_matmat_pass2')

~\Anaconda3\lib\site-packages\scipy\sparse\sputils.py in upcast(*args)
     49             return t
     50 
---> 51     raise TypeError('no supported conversion for types: %r' % (args,))
     52 
     53 

TypeError: no supported conversion for types: (dtype('O'), dtype('O'))
TypeError回溯(最近一次调用)
在()
3 xgbRegModel=XGR()
4 rscv=随机搜索CV(xgbRegModel,参数)
---->5 rscv.配合(X,y)
6 rscv.best_模型_
~\Anaconda3\lib\site packages\sklearn\model\u selection\\u search.py in fit(self、X、y、groups、**fit\u参数)
636错误分数=自我错误分数)
637对于产品中的参数(训练、测试)(候选参数,
-->638 cv.分割(X、y、组)
639
640#如果选择查看列车分数,“out”将包含列车分数信息
调用中的~\Anaconda3\lib\site packages\sklearn\externals\joblib\parallel.py(self,iterable)
777#被派遣。特别是,这覆盖了边缘
778#与耗尽迭代器一起使用的并行情况。
-->779自调度一批时(迭代器):
780自迭代=真
781其他:
~\Anaconda3\lib\site packages\sklearn\externals\joblib\parallel.py在dispatch\u one\u批处理中(self,迭代器)
623返回错误
624其他:
-->625自我派遣(任务)
626返回真值
627
~\Anaconda3\lib\site packages\sklearn\externals\joblib\parallel.py in\u dispatch(self,batch)
586 dispatch_timestamp=time.time()
587 cb=BatchCompletionCallBack(调度时间戳,len(批处理),self)
-->588 job=self.\u backend.apply\u async(批处理,回调=cb)
589 self.\u jobs.append(作业)
590
~\Anaconda3\lib\site packages\sklearn\externals\joblib\\u parallel\u backends.py in apply\u async(self、func、callback)
109 def apply_async(self、func、callback=None):
110“计划要运行的func”
-->111结果=立即结果(func)
112如果回调:
113回调(结果)
~\Anaconda3\lib\site packages\sklearn\externals\joblib\\u parallel\u backends.py in\uuuuuuuu init\uuuu(self,batch)
330#不要延迟应用程序,以免保留输入
331#内存中的参数
-->332 self.results=batch()
333
334 def get(自我):
~\Anaconda3\lib\site packages\sklearn\externals\joblib\parallel.py in\uuuuu调用(self)
129
130 def呼叫(自我):
-->131返回[func(*args,**kwargs),用于self.items中的func、args、kwargs]
132
133定义长度(自):
~\Anaconda3\lib\site packages\sklearn\externals\joblib\parallel.py in(.0)
129
130 def呼叫(自我):
-->131返回[func(*args,**kwargs),用于self.items中的func、args、kwargs]
132
133定义长度(自):
~\Anaconda3\lib\site packages\sklearn\model\u selection\\u validation.py in\u fit\u和\u score(估计器、X、y、记分器、训练、测试、详细、参数、拟合参数、返回训练分数、返回参数、返回测试样本、返回次数、错误分数)
425开始时间=时间。时间()
426
-->427 X_序列,y_序列=_安全分割(估算器,X,y,序列)
428 X_测试,y_测试=_安全分割(估计器,X,y,测试,列车)
429
~\Anaconda3\lib\site packages\sklearn\utils\metaestimators.py在安全分割中(估计器,X,y,指数,序列指数)
198 X_子集=X[np.ix_(指数,序列指数)]
199其他:
-->200 X_子集=安全索引(X,索引)
201
202如果y不是无:
安全索引中的~\Anaconda3\lib\site packages\sklearn\utils\\uuuuu init\uuuuuu.py(X,索引)
160返回X.take(指数,轴=0)
161其他:
-->162返回X[指数]
163其他:
164返回[X[idx]用于索引中的idx]
~\Anaconda3\lib\site packages\scipy\sparse\csr.py in\uuuuu getitem\uuuuuuu(self,key)
315如果isintlike(列)或isinstance(列,切片):
316 P=提取器(行,自形[0])#[[1,2],j]或[[1,2],1:2]
-->317提取=P*self
318如果列==切片(无,无,无):
319返回提取
~\Anaconda3\lib\site packages\scipy\sparse\base.py in\uuuuuu mul\uuuuu(self,other)
367如果自我塑造[1]!=其他。形状[0]:
368提升值错误(“维度不匹配”)
-->369返回自多稀疏矩阵(其他)
370
371#如果它是一个列表或其他什么,请将其视为矩阵
矩阵中的~\Anaconda3\lib\site packages\scipy\sparse\compressed.py(自身、其他)
539 indptr=np.asarray(indptr,dtype=idx\u dtype)
540索引=np.空(nnz,dtype=idx_dtype)
-->541 data=np.empty(nnz,dtype=upcast(self.dtype,other.dtype))
542
543 fn=getattr(_sparsetools,self.format+'_matmatmat_pass2')
上传中的~\Anaconda3\lib\site packages\scipy\sparse\sputils.py(*args)
49返回t
50
--->51 raise TypeError('不支持类型转换:%r'(args,))
52
53
TypeError:不支持类型的转换:(dtype('O')、dtype('O'))
那就是
X : array-like, shape = [n_samples, n_features]