Python 关于sklearn(超参数优化)的问题
我在学习随机森林模型和梯度提升模型,并编写了以下代码:Python 关于sklearn(超参数优化)的问题,python,scikit-learn,Python,Scikit Learn,我在学习随机森林模型和梯度提升模型,并编写了以下代码: from sklearn.ensemble import RandomForestRegressor from sklearn.model_selection import RandomizedSearchCV rforest = RandomForestRegressor() search_space = {'n_estimators':[100, 250, 500, 750, 1000, 1250, 1500], 'min_sample
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV
rforest = RandomForestRegressor()
search_space = {'n_estimators':[100, 250, 500, 750, 1000, 1250, 1500], 'min_samples_leaf':[1,5,10,20,25,30,40,50],'max_features': np.arange(1,train_x.shape[1]),}
rforest_search = RandomizedSearchCV(rforest, search_space, cv = 5, n_iter=20, return_train_score=False, n_jobs=-1, random_state=20)
rforest_search.fit(train_x,train_y)
print('Best parameters for random forest:', rforest_search.best_params_, '\n')
但是发生了一个错误,这个错误在我拟合决策树模型时没有发生。错误是:
UnicodeEncodeError:“ascii”编解码器无法对位置18-20的字符进行编码:序号不在范围(128)内。
我尝试使用sklearn的0.23.2和0.22.1版本,但问题仍然存在。我想知道为什么会发生这种情况,以及如何解决它
我用于决策树模型的代码是:
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import RandomizedSearchCV
d_tree = DecisionTreeRegressor()
tuning_parameters = {'min_samples_leaf':[1,5,10,20,25,30],'max_depth':np.arange(1,50),}
dt_estimation = RandomizedSearchCV(d_tree, tuning_parameters, n_iter=20, cv=5, return_train_score=False)
#Since Decisiont Tree model is not sensitive to distribution and scaling, original data is used
dt_estimation.fit(train_x,train_y)
print("best_parms:{0}\nbest_score:{1}".format(dt_estimation.best_params_, dt_estimation.best_score_))
整个错误消息如下所示:
UnicodeEncodeError Traceback (most recent call last)
<timed exec> in <module>
D:\python\lib\site-packages\sklearn\model_selection\_search.py in fit(self, X, y, groups, **fit_params)
667 verbose=self.verbose)
668 results = {}
--> 669 with parallel:
670 all_candidate_params = []
671 all_out = []
D:\python\lib\site-packages\joblib\parallel.py in __enter__(self)
709 def __enter__(self):
710 self._managed_backend = True
--> 711 self._initialize_backend()
712 return self
713
D:\python\lib\site-packages\joblib\parallel.py in _initialize_backend(self)
719 """Build a process or thread pool and return the number of workers"""
720 try:
--> 721 n_jobs = self._backend.configure(n_jobs=self.n_jobs, parallel=self,
722 **self._backend_args)
723 if self.timeout is not None and not self._backend.supports_timeout:
D:\python\lib\site-packages\joblib\_parallel_backends.py in configure(self, n_jobs, parallel, prefer, require, idle_worker_timeout, **memmappingexecutor_args)
492 SequentialBackend(nesting_level=self.nesting_level))
493
--> 494 self._workers = get_memmapping_executor(
495 n_jobs, timeout=idle_worker_timeout,
496 env=self._prepare_worker_env(n_jobs=n_jobs),
D:\python\lib\site-packages\joblib\executor.py in get_memmapping_executor(n_jobs, **kwargs)
18
19 def get_memmapping_executor(n_jobs, **kwargs):
---> 20 return MemmappingExecutor.get_memmapping_executor(n_jobs, **kwargs)
21
22
D:\python\lib\site-packages\joblib\executor.py in get_memmapping_executor(cls, n_jobs, timeout, initializer, initargs, env, temp_folder, context_id, **backend_args)
40 _executor_args = executor_args
41
---> 42 manager = TemporaryResourcesManager(temp_folder)
43
44 # reducers access the temporary folder in which to store temporary
D:\python\lib\site-packages\joblib\_memmapping_reducer.py in __init__(self, temp_folder_root, context_id)
529 # exposes exposes too many low-level details.
530 context_id = uuid4().hex
--> 531 self.set_current_context(context_id)
532
533 def set_current_context(self, context_id):
D:\python\lib\site-packages\joblib\_memmapping_reducer.py in set_current_context(self, context_id)
533 def set_current_context(self, context_id):
534 self._current_context_id = context_id
--> 535 self.register_new_context(context_id)
536
537 def register_new_context(self, context_id):
D:\python\lib\site-packages\joblib\_memmapping_reducer.py in register_new_context(self, context_id)
558 new_folder_name, self._temp_folder_root
559 )
--> 560 self.register_folder_finalizer(new_folder_path, context_id)
561 self._cached_temp_folders[context_id] = new_folder_path
562
D:\python\lib\site-packages\joblib\_memmapping_reducer.py in register_folder_finalizer(self, pool_subfolder, context_id)
588 # semaphores and pipes
589 pool_module_name = whichmodule(delete_folder, 'delete_folder')
--> 590 resource_tracker.register(pool_subfolder, "folder")
591
592 def _cleanup():
D:\python\lib\site-packages\joblib\externals\loky\backend\resource_tracker.py in register(self, name, rtype)
189 '''Register a named resource, and increment its refcount.'''
190 self.ensure_running()
--> 191 self._send('REGISTER', name, rtype)
192
193 def unregister(self, name, rtype):
D:\python\lib\site-packages\joblib\externals\loky\backend\resource_tracker.py in _send(self, cmd, name, rtype)
202
203 def _send(self, cmd, name, rtype):
--> 204 msg = '{0}:{1}:{2}\n'.format(cmd, name, rtype).encode('ascii')
205 if len(name) > 512:
206 # posix guarantees that writes to a pipe of less than PIPE_BUF
UnicodeEncodeError: 'ascii' codec can't encode characters in position 18-20: ordinal not in range(128)
UnicodeEncodeError回溯(最近一次呼叫最后一次)
在里面
D:\python\lib\site packages\sklearn\model\u selection\\u search.py in fit(self、X、y、groups、**fit\u参数)
667 verbose=self.verbose)
668结果={}
-->669与平行:
670所有候选参数=[]
671全部输出=[]
D:\python\lib\site packages\joblib\parallel.py in\uuuuu enter\uuuuu(self)
709定义输入(自我):
710 self.\u managed\u backend=True
-->711自初始化后端()
712回归自我
713
D:\python\lib\site packages\joblib\parallel.py位于\u initialize\u后端(self)
719“生成进程或线程池并返回工作进程数”
720试试:
-->721 n_jobs=self.\u backend.configure(n_jobs=self.n_jobs,parallel=self,
722**self.\u后端\u参数)
723如果self.timeout不是None且不是self.\u backend.supports\u timeout:
配置中的D:\python\lib\site packages\joblb\u parallel\u backends.py(self、n\u jobs、parallel、preference、require、idle\u worker\u timeout、**memmappingexecutor\u args)
492顺序后端(嵌套级别=自嵌套级别))
493
-->494 self.\u workers=get\u memmapping\u执行器(
495个n_作业,超时=空闲\u工作者\u超时,
496环境=自我。准备工人环境(n个工作=n个工作),
get\u memmapping\u executor中的D:\python\lib\site packages\joblb\executor.py(n\u作业,**kwargs)
18
19 def get_memmapping_执行器(n_作业,**kwargs):
--->20返回MemmappingExecutor.get_memmapping_executor(n_作业,**kwargs)
21
22
get_memmapping_executor中的D:\python\lib\site packages\joblb\executor.py(cls、n_作业、超时、初始值设定项、initargs、env、temp_文件夹、上下文_id、**后端_参数)
40执行者参数=执行者参数
41
--->42管理器=临时资源管理器(临时文件夹)
43
44#还原程序访问要存储临时文件的临时文件夹
D:\python\lib\site packages\joblib\\u memmapping\u reducer.py in\uuuuuuu init\uuuuuu(self,temp\u folder\u root,context\u id)
529暴露了太多的低级细节。
530 context_id=uuid4().hex
-->531自身。设置当前上下文(上下文id)
532
533 def设置当前上下文(自身,上下文id):
D:\python\lib\site packages\joblib\\u memmapping\u reducer.py在set\u current\u上下文(self,context\u id)中
533 def设置当前上下文(自身,上下文id):
534 self.\u current\u context\u id=context\u id
-->535.注册新上下文(上下文id)
536
537 def寄存器新上下文(自身,上下文id):
D:\python\lib\site packages\joblib\\u memmapping\u reducer.py在register\u new\u上下文(self,context\u id)中
558新文件夹名称、自身临时文件夹根目录
559 )
-->560 self.注册文件夹终结器(新文件夹路径、上下文id)
561自缓存临时文件夹[上下文id]=新文件夹路径
562
D:\python\lib\site packages\joblib\\u memmapping\u reducer.py在register\u folder\u finalizer(self,pool\u子文件夹,context\u id)中
588#信号灯和管道
589池\模块\名称=哪个模块(删除\文件夹,'删除\文件夹')
-->590资源跟踪程序。注册(池子文件夹,“文件夹”)
591
592 def_cleanup():
寄存器中的D:\python\lib\site packages\joblib\externals\loky\backend\resource\u tracker.py(self、name、rtype)
189'''注册命名资源,并增加其引用计数''
190自我保护。确保_运行()
-->191自我发送(“寄存器”、名称、rtype)
192
193 def注销(自我、姓名、rtype):
D:\python\lib\site packages\joblib\externals\loky\backend\resource\u tracker.py in\u send(self,cmd,name,rtype)
202
203 def_发送(self、cmd、name、rtype):
-->204 msg='{0}:{1}:{2}\n'.格式(cmd,name,rtype).encode('ascii')
205如果len(名称)>512:
206#posix保证写入到小于pipe#BUF的管道
UnicodeEncodeError:“ascii”编解码器无法对位置18-20中的字符进行编码:序号不在范围内(128)
欢迎来到SO!你能添加完整的错误信息吗(包括回溯,如果可能的话)?我已经添加了完整的错误信息。非常感谢。