Python Sklearn Pipeline/ColumnTransformer抛出值错误:要解压缩的值太多(预期为2个)
我正在尝试为单个管道运行SKTransformer。我的数据框称为清单。所有特性都是float或int。数据帧是干净的——唯一的问题是缺少一些标记为Nan的值,因此SimpleInputer应该处理它 Sklearn提供了关于通过ColumnTransformer运行管道的文档,这就是我所遵循的 首先,我使用管道创建管道:Python Sklearn Pipeline/ColumnTransformer抛出值错误:要解压缩的值太多(预期为2个),python,machine-learning,scikit-learn,data-science,Python,Machine Learning,Scikit Learn,Data Science,我正在尝试为单个管道运行SKTransformer。我的数据框称为清单。所有特性都是float或int。数据帧是干净的——唯一的问题是缺少一些标记为Nan的值,因此SimpleInputer应该处理它 Sklearn提供了关于通过ColumnTransformer运行管道的文档,这就是我所遵循的 首先,我使用管道创建管道: num_pipeline = Pipeline([ ('num_imputer', SimpleImputer(strategy='median')), ('
num_pipeline = Pipeline([
('num_imputer', SimpleImputer(strategy='median')),
('num_scaler', StandardScaler()),
])
disc_pipeline = Pipeline([
('disc_imputer', SimpleImputer(strategy='most_frequent')),
('disc_scaler', StandardScaler(), disc_attribs),
])
cat_pipeline = Pipeline([
('cat_imputer', SimpleImputer(strategy='most_frequent')),
('cat_ohe', OneHotEncoder(categories='auto', drop='first',
sparse=False)),
])
amen_pipeline= Pipeline([
('amen_imputer', SimpleImputer(strategy='most_frequent')),
])
然后,我通过ColumnTransformer运行它们:
listings_pipeline = ColumnTransformer([
('num', num_pipeline, num_attribs),
('disc', disc_pipeline, disc_attribs),
('cat', cat_pipeline, cat_attribs),
('amen', amen_pipeline, amen_attribs),
])
X_train = listings_pipeline.fit_transform(listings_explore)
以下是错误:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-511-9d5100fe0f5d> in <module>
12 ('amen', amen_pipeline, amen_attribs),
13 ])
---> 14 X_train = listings_pipeline.fit_transform(listings_explore_pipeline)
~/anaconda3/lib/python3.7/site-packages/sklearn/compose/_column_transformer.py in fit_transform(self, X, y)
466 self._validate_remainder(X)
467
--> 468 result = self._fit_transform(X, y, _fit_transform_one)
469
470 if not result:
~/anaconda3/lib/python3.7/site-packages/sklearn/compose/_column_transformer.py in _fit_transform(self, X, y, func, fitted)
410 message=self._log_message(name, idx, len(transformers)))
411 for idx, (name, trans, column, weight) in enumerate(
--> 412 self._iter(fitted=fitted, replace_strings=True), 1))
413 except ValueError as e:
414 if "Expected 2D array, got 1D array instead" in str(e):
~/anaconda3/lib/python3.7/site-packages/joblib/parallel.py in __call__(self, iterable)
922 self._iterating = self._original_iterator is not None
923
--> 924 while self.dispatch_one_batch(iterator):
925 pass
926
~/anaconda3/lib/python3.7/site-packages/joblib/parallel.py in dispatch_one_batch(self, iterator)
757 return False
758 else:
--> 759 self._dispatch(tasks)
760 return True
761
~/anaconda3/lib/python3.7/site-packages/joblib/parallel.py in _dispatch(self, batch)
714 with self._lock:
715 job_idx = len(self._jobs)
--> 716 job = self._backend.apply_async(batch, callback=cb)
717 # A job can complete so quickly than its callback is
718 # called before we get here, causing self._jobs to
~/anaconda3/lib/python3.7/site-packages/joblib/_parallel_backends.py in apply_async(self, func, callback)
180 def apply_async(self, func, callback=None):
181 """Schedule a func to be run"""
--> 182 result = ImmediateResult(func)
183 if callback:
184 callback(result)
~/anaconda3/lib/python3.7/site-packages/joblib/_parallel_backends.py in __init__(self, batch)
547 # Don't delay the application, to avoid keeping the input
548 # arguments in memory
--> 549 self.results = batch()
550
551 def get(self):
~/anaconda3/lib/python3.7/site-packages/joblib/parallel.py in __call__(self)
223 with parallel_backend(self._backend, n_jobs=self._n_jobs):
224 return [func(*args, **kwargs)
--> 225 for func, args, kwargs in self.items]
226
227 def __len__(self):
~/anaconda3/lib/python3.7/site-packages/joblib/parallel.py in <listcomp>(.0)
223 with parallel_backend(self._backend, n_jobs=self._n_jobs):
224 return [func(*args, **kwargs)
--> 225 for func, args, kwargs in self.items]
226
227 def __len__(self):
~/anaconda3/lib/python3.7/site-packages/sklearn/pipeline.py in _fit_transform_one(transformer, X, y, weight, message_clsname, message, **fit_params)
714 with _print_elapsed_time(message_clsname, message):
715 if hasattr(transformer, 'fit_transform'):
--> 716 res = transformer.fit_transform(X, y, **fit_params)
717 else:
718 res = transformer.fit(X, y, **fit_params).transform(X)
~/anaconda3/lib/python3.7/site-packages/sklearn/pipeline.py in fit_transform(self, X, y, **fit_params)
385 """
386 last_step = self._final_estimator
--> 387 Xt, fit_params = self._fit(X, y, **fit_params)
388 with _print_elapsed_time('Pipeline',
389 self._log_message(len(self.steps) - 1)):
~/anaconda3/lib/python3.7/site-packages/sklearn/pipeline.py in _fit(self, X, y, **fit_params)
270 fit_transform_one_cached = memory.cache(_fit_transform_one)
271
--> 272 fit_params_steps = {name: {} for name, step in self.steps
273 if step is not None}
274 for pname, pval in fit_params.items():
~/anaconda3/lib/python3.7/site-packages/sklearn/pipeline.py in <dictcomp>(.0)
270 fit_transform_one_cached = memory.cache(_fit_transform_one)
271
--> 272 fit_params_steps = {name: {} for name, step in self.steps
273 if step is not None}
274 for pname, pval in fit_params.items():
ValueError: too many values to unpack (expected 2)
---------------------------------------------------------------------------
ValueError回溯(最近一次调用上次)
在里面
12(“阿门”,阿门管道,阿门属性),
13 ])
--->14 X_train=listings_pipeline.fit_transform(listings_explore_pipeline)
~/anaconda3/lib/python3.7/site-packages/sklearn/compose//u column\u transformer.py in fit\u transform(self,X,y)
466自验证余数(X)
467
-->468结果=自拟合变换(X,y,拟合变换)
469
470如果没有结果:
~/anaconda3/lib/python3.7/site-packages/sklearn/compose//u column\u transformer.py in\u fit\u transform(self、X、y、func、fitted)
410 message=self.\u log\u message(名称、idx、len(变压器)))
411表示枚举中的idx(名称、事务、列、权重)(
-->412自测试仪(已安装=已安装,更换字符串=正确),1)
413除e值错误外:
414如果str(e)中的“预期2D数组,改为1D数组”:
~/anaconda3/lib/python3.7/site-packages/joblib/parallel.py在调用中(self,iterable)
922 self.\u iterating=self.\u original\u iterator不是None
923
-->924自行调度一批时(迭代器):
925通行证
926
~/anaconda3/lib/python3.7/site-packages/joblib/parallel.py在dispatch\u one\u批处理中(self,迭代器)
757返回错误
758其他:
-->759自我派遣(任务)
760返回真值
761
~/anaconda3/lib/python3.7/site-packages/joblb/parallel.py in_dispatch(self,batch)
714带自锁:
715作业idx=len(自作业)
-->716作业=self.\u后端.apply\u异步(批处理,回调=cb)
717#一个作业完成的速度比它的回调速度要快
718#在我们到达这里之前打电话,导致self.#你的工作
~/anaconda3/lib/python3.7/site-packages/joblib//\u parallel\u backends.py in apply\u async(self、func、callback)
180 def apply_async(self、func、callback=None):
181“计划要运行的func”
-->182结果=立即结果(func)
183如果回调:
184回调(结果)
~/anaconda3/lib/python3.7/site-packages/joblib//\u parallel\u backends.py in\uuuu\u init\uuuu(self,batch)
547#不要延迟应用程序,以免保留输入
548#内存中的参数
-->549 self.results=batch()
550
551 def get(自我):
~/anaconda3/lib/python3.7/site-packages/joblb/parallel.py in\uuuu调用(self)
223具有并行_后端(self._后端,n_作业=self._n_作业):
224返回[func(*args,**kwargs)
-->225用于自身项目中的func、ARG、kwargs]
226
227定义长度(自):
~/anaconda3/lib/python3.7/site-packages/joblb/parallel.py in(.0)
223具有并行_后端(self._后端,n_作业=self._n_作业):
224返回[func(*args,**kwargs)
-->225用于自身项目中的func、ARG、kwargs]
226
227定义长度(自):
~/anaconda3/lib/python3.7/site-packages/sklearn/pipeline.py in_fit_transform_one(transformer,X,y,weight,message_clsname,message,**fit_params)
714带有_print_exposed_time(消息名称,消息):
715如果hasattr(变压器,“拟合变换”):
-->716 res=变换器。拟合变换(X,y,**拟合参数)
717其他:
718 res=变换器.fit(X,y,**拟合参数).transform(X)
拟合转换中的~/anaconda3/lib/python3.7/site-packages/sklearn/pipeline.py(self,X,y,**拟合参数)
385 """
386最后一步=自我最终估计器
-->387 Xt,拟合参数=自拟合(X,y,**拟合参数)
388,带有“打印”经过的时间(“管道”,
389自我日志消息(len(self.steps)-1):
~/anaconda3/lib/python3.7/site-packages/sklearn/pipeline.py in_-fit(self,X,y,**fit_参数)
270 fit\u transform\u one\u cached=memory.cache(\u fit\u transform\u one)
271
-->272 fit_params_steps={name:{}表示名称,step in self.steps
273如果步骤不是无}
274对于pname,在fit_params.items()中使用pval:
~/anaconda3/lib/python3.7/site-packages/sklearn/pipeline.py in(.0)
270 fit\u transform\u one\u cached=memory.cache(\u fit\u transform\u one)
271
-->272 fit_params_steps={name:{}表示名称,step in self.steps
273如果步骤不是无}
274对于pname,在fit_params.items()中使用pval:
ValueError:要解压缩的值太多(应为2个)
为什么这不起作用