Python GridSearchCV-TypeError:需要整数

Python GridSearchCV-TypeError:需要整数,python,machine-learning,scikit-learn,svm,Python,Machine Learning,Scikit Learn,Svm,我正在尝试使用网格搜索为我的SVM找到最佳超参数。按以下方式操作时: from sklearn.model_selection import GridSearchCV param_grid = {'coef0': [10, 5, 0.5, 0.001], 'C': [100, 50, 1, 0.001]} poly_svm_search = SVC(kernel="poly", degree="2") grid_search = GridSearchCV(poly_svm_search, pa

我正在尝试使用网格搜索为我的SVM找到最佳超参数。按以下方式操作时:

from sklearn.model_selection import GridSearchCV

param_grid = {'coef0': [10, 5, 0.5, 0.001], 'C': [100, 50, 1, 0.001]}
poly_svm_search = SVC(kernel="poly", degree="2")
grid_search = GridSearchCV(poly_svm_search, param_grid, cv=5, scoring='f1')

grid_search.fit(train_data, train_labels)
我得到这个错误:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-72-dadf5782618c> in <module>
      8 
----> 9 grid_search.fit(train_data, train_labels)

~/.local/lib/python3.6/site-packages/sklearn/model_selection/_search.py in fit(self, X, y, groups, **fit_params)
    720                 return results_container[0]
    721 
--> 722             self._run_search(evaluate_candidates)
    723 
    724         results = results_container[0]

~/.local/lib/python3.6/site-packages/sklearn/model_selection/_search.py in _run_search(self, evaluate_candidates)
   1189     def _run_search(self, evaluate_candidates):
   1190         """Search all candidates in param_grid"""
-> 1191         evaluate_candidates(ParameterGrid(self.param_grid))
   1192 
   1193 

~/.local/lib/python3.6/site-packages/sklearn/model_selection/_search.py in evaluate_candidates(candidate_params)
    709                                for parameters, (train, test)
    710                                in product(candidate_params,
--> 711                                           cv.split(X, y, groups)))
    712 
    713                 all_candidate_params.extend(candidate_params)

~/.local/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in __call__(self, iterable)
    981             # remaining jobs.
    982             self._iterating = False
--> 983             if self.dispatch_one_batch(iterator):
    984                 self._iterating = self._original_iterator is not None
    985 

~/.local/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in dispatch_one_batch(self, iterator)
    823                 return False
    824             else:
--> 825                 self._dispatch(tasks)
    826                 return True
    827 

~/.local/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in _dispatch(self, batch)
    780         with self._lock:
    781             job_idx = len(self._jobs)
--> 782             job = self._backend.apply_async(batch, callback=cb)
    783             # A job can complete so quickly than its callback is
    784             # called before we get here, causing self._jobs to

~/.local/lib/python3.6/site-packages/sklearn/externals/joblib/_parallel_backends.py in apply_async(self, func, callback)
    180     def apply_async(self, func, callback=None):
    181         """Schedule a func to be run"""
--> 182         result = ImmediateResult(func)
    183         if callback:
    184             callback(result)

~/.local/lib/python3.6/site-packages/sklearn/externals/joblib/_parallel_backends.py in __init__(self, batch)
    543         # Don't delay the application, to avoid keeping the input
    544         # arguments in memory
--> 545         self.results = batch()
    546 
    547     def get(self):

~/.local/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in __call__(self)
    259         with parallel_backend(self._backend):
    260             return [func(*args, **kwargs)
--> 261                     for func, args, kwargs in self.items]
    262 
    263     def __len__(self):

~/.local/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in <listcomp>(.0)
    259         with parallel_backend(self._backend):
    260             return [func(*args, **kwargs)
--> 261                     for func, args, kwargs in self.items]
    262 
    263     def __len__(self):

~/.local/lib/python3.6/site-packages/sklearn/model_selection/_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator, error_score)
    526             estimator.fit(X_train, **fit_params)
    527         else:
--> 528             estimator.fit(X_train, y_train, **fit_params)
    529 
    530     except Exception as e:

~/.local/lib/python3.6/site-packages/sklearn/svm/base.py in fit(self, X, y, sample_weight)
    210 
    211         seed = rnd.randint(np.iinfo('i').max)
--> 212         fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)
    213         # see comment on the other call to np.iinfo in this file
    214 

~/.local/lib/python3.6/site-packages/sklearn/svm/base.py in _sparse_fit(self, X, y, sample_weight, solver_type, kernel, random_seed)
    291                 sample_weight, self.nu, self.cache_size, self.epsilon,
    292                 int(self.shrinking), int(self.probability), self.max_iter,
--> 293                 random_seed)
    294 
    295         self._warn_from_fit_status()

sklearn/svm/libsvm_sparse.pyx in sklearn.svm.libsvm_sparse.libsvm_sparse_train()

TypeError: an integer is required
---------------------------------------------------------------------------
TypeError回溯(最近一次调用上次)
在里面
8.
---->9网格搜索。拟合(列数据、列标签)
~/.local/lib/python3.6/site-packages/sklearn/model\u selection//u search.py in fit(self、X、y、groups、**fit\u参数)
720返回结果\u容器[0]
721
-->722自我搜索(评估候选人)
723
724结果=结果\u容器[0]
~/.local/lib/python3.6/site-packages/sklearn/model\u selection//u search.py in\u run\u search(自我评估候选人)
1189定义-运行-搜索(自我评估-候选):
1190“搜索参数网格中的所有候选项”
->1191评估候选参数(参数网格(self.param网格))
1192
1193
~/.local/lib/python3.6/site-packages/sklearn/model\u selection//u search.py in evaluate\u候选者(候选者参数)
709参数(列车、试验)
710英寸产品(候选参数,
-->711 cv.分割(X、y、组)
712
713所有候选参数扩展(候选参数)
调用中的~/.local/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py(self,iterable)
981#剩余工作。
982自迭代=错误
-->983如果自行调度一批(迭代器):
984 self.\u iterating=self.\u original\u iterator不是None
985
调度批处理中的~/.local/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py(self,迭代器)
823返回错误
824其他:
-->825自我调度(任务)
826返回真值
827
~/.local/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in_dispatch(self,batch)
780带自锁:
781作业idx=len(自作业)
-->782作业=self.\u后端.apply\u异步(批处理,回调=cb)
783#一个作业完成的速度比它的回调速度要快
784#在我们到达这里之前打电话给self.#u jobs
异步应用中的~/.local/lib/python3.6/site-packages/sklearn/externals/joblib//\u parallel\u backends.py(self、func、callback)
180 def apply_async(self、func、callback=None):
181“计划要运行的func”
-->182结果=立即结果(func)
183如果回调:
184回调(结果)
~/.local/lib/python3.6/site-packages/sklearn/externals/joblib//u parallel\u backends.py in\uuuuu init\uuuuu(self,batch)
543#不要延迟应用程序,以免保留输入
544#内存中的参数
-->545 self.results=batch()
546
547 def get(自我):
~/.local/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in\uuuu调用(self)
259与并行_后端(自。_后端):
260返回[func(*args,**kwargs)
-->261用于自身项目中的func、ARG、kwargs]
262
263定义长度(自):
~/.local/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in(.0)
259与并行_后端(自。_后端):
260返回[func(*args,**kwargs)
-->261用于自身项目中的func、ARG、kwargs]
262
263定义长度(自):
~/.local/lib/python3.6/site-packages/sklearn/model\u selection//u validation.py in\u fit\u and\u score(估计器、X、y、记分器、训练、测试、冗余、参数、拟合参数、返回训练分数、返回参数、返回测试样本、返回次数、返回估计器、错误分数)
526估算器拟合(X列,**拟合参数)
527其他:
-->528估算器拟合(X_序列、y_序列、**拟合参数)
529
530例外情况除外,如e:
适合的~/.local/lib/python3.6/site-packages/sklearn/svm/base.py(自我、X、y、样本重量)
210
211 seed=rnd.randint(np.iinfo('i').max)
-->212拟合(X,y,样本权重,解算器类型,核,随机种子=种子)
213#请参阅此文件中对np.iinfo的另一个调用的注释
214
~/.local/lib/python3.6/site-packages/sklearn/svm/base.py in\u sparse\u fit(self,X,y,sample\u weight,solver\u type,kernel,random\u seed)
291样本重量,self.nu,self.cache,self.epsilon,
292 int(自收缩)、int(自概率)、self.max_iter、,
-->293(随机种子)
294
295自我警告状态()
sklearn.svm.libsvm_sparse.libsvm_sparse_train()中的sklearn/svm/libsvm_sparse.pyx
TypeError:需要一个整数
我的
train_labels
变量包含一个布尔值列表,因此我有一个二进制分类问题
train_数据
是一个
,基本上包含所有
缩放的
一个热编码的
特征


我做错了什么?我很难找到问题所在。我事先感谢你的帮助

使用此行初始化SVC时:

poly_svm_search = SVC(kernel="poly", degree="2")
由于(
degree
param)周围有倒逗号,因此您正在为它提供一个字符串。但是,
degree
将整数作为值

阶数:int,多项式核的可选阶数(默认值=3) 函数('poly')。被所有其他内核忽略

因此,您需要这样做:

poly_svm_search = SVC(kernel="poly", degree=2)

请注意,我在这里没有使用倒逗号。

训练数据或标签中似乎有问题:能否尝试传递这两个稀疏矩阵的
.todense()
版本,看看会发生什么?如果由于内存限制而不可行,您可以先对矩阵进行切片谢谢,您是对的。我想