Python 3.x 使用GridSearchCV和RandomizedSearchCV时出错
当尝试使用或来拟合我的培训数据时,我不断遇到以下错误: TypeError:不支持类型的转换:(dtype('O')、dtype('O')) 以下是相关代码的示例:Python 3.x 使用GridSearchCV和RandomizedSearchCV时出错,python-3.x,scipy,scikit-learn,sparse-matrix,cross-validation,Python 3.x,Scipy,Scikit Learn,Sparse Matrix,Cross Validation,当尝试使用或来拟合我的培训数据时,我不断遇到以下错误: TypeError:不支持类型的转换:(dtype('O')、dtype('O')) 以下是相关代码的示例: from xgboost.sklearn import XGBRegressor as XGR from sklearn.model_selection import RandomizedSearchCV, GridSearchCV xgbRegModel = XGR() params = {'max_depth':[3, 6,
from xgboost.sklearn import XGBRegressor as XGR
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
xgbRegModel = XGR()
params = {'max_depth':[3, 6, 9], 'learning_rate':[.05, .1, .5], 'n_estimators': [50, 100, 200]}
rscv = RandomizedSearchCV(xgbRegModel, params)
rscv.fit(X, y)
rscv.best_model_
其中,X
是a(39942112577),y
是a(39942,)
这些数据类型要么是int64
要么是float64
,我已经试着用np.nan
值运行它,然后用0填充np.nan
值。。。(我想这可能是问题所在,但不是。)
谁能告诉我这里发生了什么事?当我在不使用GridSearchCV或RandomizedSearchCV的情况下训练模型时,效果很好
任何想法都将不胜感激-谢谢
ps-错误的回溯确实很长,但如果有帮助的话,它就在这里
TypeError Traceback (most recent call last)
<ipython-input-54-63d54d4cd03e> in <module>()
3 xgbRegModel = XGR()
4 rscv = RandomizedSearchCV(xgbRegModel, params)
----> 5 rscv.fit(X, y)
6 rscv.best_model_
~\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py in fit(self, X, y, groups, **fit_params)
636 error_score=self.error_score)
637 for parameters, (train, test) in product(candidate_params,
--> 638 cv.split(X, y, groups)))
639
640 # if one choose to see train score, "out" will contain train score info
~\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self, iterable)
777 # was dispatched. In particular this covers the edge
778 # case of Parallel used with an exhausted iterator.
--> 779 while self.dispatch_one_batch(iterator):
780 self._iterating = True
781 else:
~\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in dispatch_one_batch(self, iterator)
623 return False
624 else:
--> 625 self._dispatch(tasks)
626 return True
627
~\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in _dispatch(self, batch)
586 dispatch_timestamp = time.time()
587 cb = BatchCompletionCallBack(dispatch_timestamp, len(batch), self)
--> 588 job = self._backend.apply_async(batch, callback=cb)
589 self._jobs.append(job)
590
~\Anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in apply_async(self, func, callback)
109 def apply_async(self, func, callback=None):
110 """Schedule a func to be run"""
--> 111 result = ImmediateResult(func)
112 if callback:
113 callback(result)
~\Anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in __init__(self, batch)
330 # Don't delay the application, to avoid keeping the input
331 # arguments in memory
--> 332 self.results = batch()
333
334 def get(self):
~\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self)
129
130 def __call__(self):
--> 131 return [func(*args, **kwargs) for func, args, kwargs in self.items]
132
133 def __len__(self):
~\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in <listcomp>(.0)
129
130 def __call__(self):
--> 131 return [func(*args, **kwargs) for func, args, kwargs in self.items]
132
133 def __len__(self):
~\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, error_score)
425 start_time = time.time()
426
--> 427 X_train, y_train = _safe_split(estimator, X, y, train)
428 X_test, y_test = _safe_split(estimator, X, y, test, train)
429
~\Anaconda3\lib\site-packages\sklearn\utils\metaestimators.py in _safe_split(estimator, X, y, indices, train_indices)
198 X_subset = X[np.ix_(indices, train_indices)]
199 else:
--> 200 X_subset = safe_indexing(X, indices)
201
202 if y is not None:
~\Anaconda3\lib\site-packages\sklearn\utils\__init__.py in safe_indexing(X, indices)
160 return X.take(indices, axis=0)
161 else:
--> 162 return X[indices]
163 else:
164 return [X[idx] for idx in indices]
~\Anaconda3\lib\site-packages\scipy\sparse\csr.py in __getitem__(self, key)
315 if isintlike(col) or isinstance(col,slice):
316 P = extractor(row, self.shape[0]) # [[1,2],j] or [[1,2],1:2]
--> 317 extracted = P * self
318 if col == slice(None, None, None):
319 return extracted
~\Anaconda3\lib\site-packages\scipy\sparse\base.py in __mul__(self, other)
367 if self.shape[1] != other.shape[0]:
368 raise ValueError('dimension mismatch')
--> 369 return self._mul_sparse_matrix(other)
370
371 # If it's a list or whatever, treat it like a matrix
~\Anaconda3\lib\site-packages\scipy\sparse\compressed.py in _mul_sparse_matrix(self, other)
539 indptr = np.asarray(indptr, dtype=idx_dtype)
540 indices = np.empty(nnz, dtype=idx_dtype)
--> 541 data = np.empty(nnz, dtype=upcast(self.dtype, other.dtype))
542
543 fn = getattr(_sparsetools, self.format + '_matmat_pass2')
~\Anaconda3\lib\site-packages\scipy\sparse\sputils.py in upcast(*args)
49 return t
50
---> 51 raise TypeError('no supported conversion for types: %r' % (args,))
52
53
TypeError: no supported conversion for types: (dtype('O'), dtype('O'))
TypeError回溯(最近一次调用)
在()
3 xgbRegModel=XGR()
4 rscv=随机搜索CV(xgbRegModel,参数)
---->5 rscv.配合(X,y)
6 rscv.best_模型_
~\Anaconda3\lib\site packages\sklearn\model\u selection\\u search.py in fit(self、X、y、groups、**fit\u参数)
636错误分数=自我错误分数)
637对于产品中的参数(训练、测试)(候选参数,
-->638 cv.分割(X、y、组)
639
640#如果选择查看列车分数,“out”将包含列车分数信息
调用中的~\Anaconda3\lib\site packages\sklearn\externals\joblib\parallel.py(self,iterable)
777#被派遣。特别是,这覆盖了边缘
778#与耗尽迭代器一起使用的并行情况。
-->779自调度一批时(迭代器):
780自迭代=真
781其他:
~\Anaconda3\lib\site packages\sklearn\externals\joblib\parallel.py在dispatch\u one\u批处理中(self,迭代器)
623返回错误
624其他:
-->625自我派遣(任务)
626返回真值
627
~\Anaconda3\lib\site packages\sklearn\externals\joblib\parallel.py in\u dispatch(self,batch)
586 dispatch_timestamp=time.time()
587 cb=BatchCompletionCallBack(调度时间戳,len(批处理),self)
-->588 job=self.\u backend.apply\u async(批处理,回调=cb)
589 self.\u jobs.append(作业)
590
~\Anaconda3\lib\site packages\sklearn\externals\joblib\\u parallel\u backends.py in apply\u async(self、func、callback)
109 def apply_async(self、func、callback=None):
110“计划要运行的func”
-->111结果=立即结果(func)
112如果回调:
113回调(结果)
~\Anaconda3\lib\site packages\sklearn\externals\joblib\\u parallel\u backends.py in\uuuuuuuu init\uuuu(self,batch)
330#不要延迟应用程序,以免保留输入
331#内存中的参数
-->332 self.results=batch()
333
334 def get(自我):
~\Anaconda3\lib\site packages\sklearn\externals\joblib\parallel.py in\uuuuu调用(self)
129
130 def呼叫(自我):
-->131返回[func(*args,**kwargs),用于self.items中的func、args、kwargs]
132
133定义长度(自):
~\Anaconda3\lib\site packages\sklearn\externals\joblib\parallel.py in(.0)
129
130 def呼叫(自我):
-->131返回[func(*args,**kwargs),用于self.items中的func、args、kwargs]
132
133定义长度(自):
~\Anaconda3\lib\site packages\sklearn\model\u selection\\u validation.py in\u fit\u和\u score(估计器、X、y、记分器、训练、测试、详细、参数、拟合参数、返回训练分数、返回参数、返回测试样本、返回次数、错误分数)
425开始时间=时间。时间()
426
-->427 X_序列,y_序列=_安全分割(估算器,X,y,序列)
428 X_测试,y_测试=_安全分割(估计器,X,y,测试,列车)
429
~\Anaconda3\lib\site packages\sklearn\utils\metaestimators.py在安全分割中(估计器,X,y,指数,序列指数)
198 X_子集=X[np.ix_(指数,序列指数)]
199其他:
-->200 X_子集=安全索引(X,索引)
201
202如果y不是无:
安全索引中的~\Anaconda3\lib\site packages\sklearn\utils\\uuuuu init\uuuuuu.py(X,索引)
160返回X.take(指数,轴=0)
161其他:
-->162返回X[指数]
163其他:
164返回[X[idx]用于索引中的idx]
~\Anaconda3\lib\site packages\scipy\sparse\csr.py in\uuuuu getitem\uuuuuuu(self,key)
315如果isintlike(列)或isinstance(列,切片):
316 P=提取器(行,自形[0])#[[1,2],j]或[[1,2],1:2]
-->317提取=P*self
318如果列==切片(无,无,无):
319返回提取
~\Anaconda3\lib\site packages\scipy\sparse\base.py in\uuuuuu mul\uuuuu(self,other)
367如果自我塑造[1]!=其他。形状[0]:
368提升值错误(“维度不匹配”)
-->369返回自多稀疏矩阵(其他)
370
371#如果它是一个列表或其他什么,请将其视为矩阵
矩阵中的~\Anaconda3\lib\site packages\scipy\sparse\compressed.py(自身、其他)
539 indptr=np.asarray(indptr,dtype=idx\u dtype)
540索引=np.空(nnz,dtype=idx_dtype)
-->541 data=np.empty(nnz,dtype=upcast(self.dtype,other.dtype))
542
543 fn=getattr(_sparsetools,self.format+'_matmatmat_pass2')
上传中的~\Anaconda3\lib\site packages\scipy\sparse\sputils.py(*args)
49返回t
50
--->51 raise TypeError('不支持类型转换:%r'(args,))
52
53
TypeError:不支持类型的转换:(dtype('O')、dtype('O'))
那就是
X : array-like, shape = [n_samples, n_features]