Python GridSearchCV-TypeError:需要整数
我正在尝试使用网格搜索为我的SVM找到最佳超参数。按以下方式操作时:Python GridSearchCV-TypeError:需要整数,python,machine-learning,scikit-learn,svm,Python,Machine Learning,Scikit Learn,Svm,我正在尝试使用网格搜索为我的SVM找到最佳超参数。按以下方式操作时: from sklearn.model_selection import GridSearchCV param_grid = {'coef0': [10, 5, 0.5, 0.001], 'C': [100, 50, 1, 0.001]} poly_svm_search = SVC(kernel="poly", degree="2") grid_search = GridSearchCV(poly_svm_search, pa
from sklearn.model_selection import GridSearchCV
param_grid = {'coef0': [10, 5, 0.5, 0.001], 'C': [100, 50, 1, 0.001]}
poly_svm_search = SVC(kernel="poly", degree="2")
grid_search = GridSearchCV(poly_svm_search, param_grid, cv=5, scoring='f1')
grid_search.fit(train_data, train_labels)
我得到这个错误:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-72-dadf5782618c> in <module>
8
----> 9 grid_search.fit(train_data, train_labels)
~/.local/lib/python3.6/site-packages/sklearn/model_selection/_search.py in fit(self, X, y, groups, **fit_params)
720 return results_container[0]
721
--> 722 self._run_search(evaluate_candidates)
723
724 results = results_container[0]
~/.local/lib/python3.6/site-packages/sklearn/model_selection/_search.py in _run_search(self, evaluate_candidates)
1189 def _run_search(self, evaluate_candidates):
1190 """Search all candidates in param_grid"""
-> 1191 evaluate_candidates(ParameterGrid(self.param_grid))
1192
1193
~/.local/lib/python3.6/site-packages/sklearn/model_selection/_search.py in evaluate_candidates(candidate_params)
709 for parameters, (train, test)
710 in product(candidate_params,
--> 711 cv.split(X, y, groups)))
712
713 all_candidate_params.extend(candidate_params)
~/.local/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in __call__(self, iterable)
981 # remaining jobs.
982 self._iterating = False
--> 983 if self.dispatch_one_batch(iterator):
984 self._iterating = self._original_iterator is not None
985
~/.local/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in dispatch_one_batch(self, iterator)
823 return False
824 else:
--> 825 self._dispatch(tasks)
826 return True
827
~/.local/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in _dispatch(self, batch)
780 with self._lock:
781 job_idx = len(self._jobs)
--> 782 job = self._backend.apply_async(batch, callback=cb)
783 # A job can complete so quickly than its callback is
784 # called before we get here, causing self._jobs to
~/.local/lib/python3.6/site-packages/sklearn/externals/joblib/_parallel_backends.py in apply_async(self, func, callback)
180 def apply_async(self, func, callback=None):
181 """Schedule a func to be run"""
--> 182 result = ImmediateResult(func)
183 if callback:
184 callback(result)
~/.local/lib/python3.6/site-packages/sklearn/externals/joblib/_parallel_backends.py in __init__(self, batch)
543 # Don't delay the application, to avoid keeping the input
544 # arguments in memory
--> 545 self.results = batch()
546
547 def get(self):
~/.local/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in __call__(self)
259 with parallel_backend(self._backend):
260 return [func(*args, **kwargs)
--> 261 for func, args, kwargs in self.items]
262
263 def __len__(self):
~/.local/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in <listcomp>(.0)
259 with parallel_backend(self._backend):
260 return [func(*args, **kwargs)
--> 261 for func, args, kwargs in self.items]
262
263 def __len__(self):
~/.local/lib/python3.6/site-packages/sklearn/model_selection/_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator, error_score)
526 estimator.fit(X_train, **fit_params)
527 else:
--> 528 estimator.fit(X_train, y_train, **fit_params)
529
530 except Exception as e:
~/.local/lib/python3.6/site-packages/sklearn/svm/base.py in fit(self, X, y, sample_weight)
210
211 seed = rnd.randint(np.iinfo('i').max)
--> 212 fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)
213 # see comment on the other call to np.iinfo in this file
214
~/.local/lib/python3.6/site-packages/sklearn/svm/base.py in _sparse_fit(self, X, y, sample_weight, solver_type, kernel, random_seed)
291 sample_weight, self.nu, self.cache_size, self.epsilon,
292 int(self.shrinking), int(self.probability), self.max_iter,
--> 293 random_seed)
294
295 self._warn_from_fit_status()
sklearn/svm/libsvm_sparse.pyx in sklearn.svm.libsvm_sparse.libsvm_sparse_train()
TypeError: an integer is required
---------------------------------------------------------------------------
TypeError回溯(最近一次调用上次)
在里面
8.
---->9网格搜索。拟合(列数据、列标签)
~/.local/lib/python3.6/site-packages/sklearn/model\u selection//u search.py in fit(self、X、y、groups、**fit\u参数)
720返回结果\u容器[0]
721
-->722自我搜索(评估候选人)
723
724结果=结果\u容器[0]
~/.local/lib/python3.6/site-packages/sklearn/model\u selection//u search.py in\u run\u search(自我评估候选人)
1189定义-运行-搜索(自我评估-候选):
1190“搜索参数网格中的所有候选项”
->1191评估候选参数(参数网格(self.param网格))
1192
1193
~/.local/lib/python3.6/site-packages/sklearn/model\u selection//u search.py in evaluate\u候选者(候选者参数)
709参数(列车、试验)
710英寸产品(候选参数,
-->711 cv.分割(X、y、组)
712
713所有候选参数扩展(候选参数)
调用中的~/.local/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py(self,iterable)
981#剩余工作。
982自迭代=错误
-->983如果自行调度一批(迭代器):
984 self.\u iterating=self.\u original\u iterator不是None
985
调度批处理中的~/.local/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py(self,迭代器)
823返回错误
824其他:
-->825自我调度(任务)
826返回真值
827
~/.local/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in_dispatch(self,batch)
780带自锁:
781作业idx=len(自作业)
-->782作业=self.\u后端.apply\u异步(批处理,回调=cb)
783#一个作业完成的速度比它的回调速度要快
784#在我们到达这里之前打电话给self.#u jobs
异步应用中的~/.local/lib/python3.6/site-packages/sklearn/externals/joblib//\u parallel\u backends.py(self、func、callback)
180 def apply_async(self、func、callback=None):
181“计划要运行的func”
-->182结果=立即结果(func)
183如果回调:
184回调(结果)
~/.local/lib/python3.6/site-packages/sklearn/externals/joblib//u parallel\u backends.py in\uuuuu init\uuuuu(self,batch)
543#不要延迟应用程序,以免保留输入
544#内存中的参数
-->545 self.results=batch()
546
547 def get(自我):
~/.local/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in\uuuu调用(self)
259与并行_后端(自。_后端):
260返回[func(*args,**kwargs)
-->261用于自身项目中的func、ARG、kwargs]
262
263定义长度(自):
~/.local/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in(.0)
259与并行_后端(自。_后端):
260返回[func(*args,**kwargs)
-->261用于自身项目中的func、ARG、kwargs]
262
263定义长度(自):
~/.local/lib/python3.6/site-packages/sklearn/model\u selection//u validation.py in\u fit\u and\u score(估计器、X、y、记分器、训练、测试、冗余、参数、拟合参数、返回训练分数、返回参数、返回测试样本、返回次数、返回估计器、错误分数)
526估算器拟合(X列,**拟合参数)
527其他:
-->528估算器拟合(X_序列、y_序列、**拟合参数)
529
530例外情况除外,如e:
适合的~/.local/lib/python3.6/site-packages/sklearn/svm/base.py(自我、X、y、样本重量)
210
211 seed=rnd.randint(np.iinfo('i').max)
-->212拟合(X,y,样本权重,解算器类型,核,随机种子=种子)
213#请参阅此文件中对np.iinfo的另一个调用的注释
214
~/.local/lib/python3.6/site-packages/sklearn/svm/base.py in\u sparse\u fit(self,X,y,sample\u weight,solver\u type,kernel,random\u seed)
291样本重量,self.nu,self.cache,self.epsilon,
292 int(自收缩)、int(自概率)、self.max_iter、,
-->293(随机种子)
294
295自我警告状态()
sklearn.svm.libsvm_sparse.libsvm_sparse_train()中的sklearn/svm/libsvm_sparse.pyx
TypeError:需要一个整数
我的train_labels
变量包含一个布尔值列表,因此我有一个二进制分类问题train_数据
是一个
,基本上包含所有缩放的
和一个热编码的
特征
我做错了什么?我很难找到问题所在。我事先感谢你的帮助 使用此行初始化SVC时:
poly_svm_search = SVC(kernel="poly", degree="2")
由于(degree
param)周围有倒逗号,因此您正在为它提供一个字符串。但是,degree
将整数作为值
阶数:int,多项式核的可选阶数(默认值=3)
函数('poly')。被所有其他内核忽略
因此,您需要这样做:
poly_svm_search = SVC(kernel="poly", degree=2)
请注意,我在这里没有使用倒逗号。训练数据或标签中似乎有问题:能否尝试传递这两个稀疏矩阵的
.todense()
版本,看看会发生什么?如果由于内存限制而不可行,您可以先对矩阵进行切片谢谢,您是对的。我想