Python sklearn FeatureUnion赢得';你不能用交叉评分法吗?

Python sklearn FeatureUnion赢得';你不能用交叉评分法吗?,python,scikit-learn,Python,Scikit Learn,我正在玩FeatureUnion和Pipeline,但是被这个用例卡住了 # simply return a column in a Pandas DataFrame class ItemSelector(BaseEstimator, TransformerMixin): def __init__(self, col): self.col = col def fit(self, x, y=None): return self def transform(self,

我正在玩
FeatureUnion
Pipeline
,但是被这个用例卡住了

# simply return a column in a Pandas DataFrame
class ItemSelector(BaseEstimator, TransformerMixin):
  def __init__(self, col):
    self.col = col

  def fit(self, x, y=None):
    return self

  def transform(self, x):
    return x[self.col]  # return a column


# convert categorical features into one-hot encoding format
class CategoricalEncoder(BaseEstimator, TransformerMixin):
  def __init__(self):
    self.lb = LabelBinarizer()

  def fit(self, x, y=None):
    self.lb.fit(x)
    return self

  def transform(self, x):
    rez = self.lb.transform(x)
    return rez


# This dummy one just combines the above 2 transformers into one for convenience reasons
class DummyEncoder(BaseEstimator, TransformerMixin):
  def __init__(self, col):
    # here is a feature union inside which a Pipeline used,
    # first, select a column, then one-hot encode the column
    self.union = FeatureUnion([('one', Pipeline([('select', ItemSelector(col)), ('encode', CategoricalEncoder())]))])

  def fit(self, x, y=None):
    self.union.fit(x)
    return self

  def transform(self, x):
    return self.union.transform(x)

# alright, here is the testing code
df = pd.DataFrame(data={'Y': [1,2,1,2,1], 'X': ['a','b','a','b','c']})
pipe_conf = [
      ('union', FeatureUnion([('union_0', DummyEncoder('X'))])),
      ('clf', LogisticRegression())
    ]
pipe = Pipeline(pipe_conf)

# 1) call fit and predict explicitly, it works
pipe.fit(df, df['Y'])
pipe.predict(df)

# 2) via sklearn cross validation, error, lots of error which I'm just not able to understand at all
cross_val_score(pipe, df, df['Y'], cv=2)
我上面的代码有错误吗?请给我一个提示

错误如下:

Traceback (most recent call last):
  File "a.py", line 65, in <module>
    cross_val_score(pipe, df, df['Y'], cv=2)
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/model_selection/_validation.py", line 140, in cross_val_score
    for train, test in cv.split(X, y, groups))
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 758, in __call__
    while self.dispatch_one_batch(iterator):
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 608, in dispatch_one_batch
    self._dispatch(tasks)
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 571, in _dispatch
    job = self._backend.apply_async(batch, callback=cb)
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 109, in apply_async
    result = ImmediateResult(func)
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 322, in __init__
    self.results = batch()
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 131, in __call__
    return [func(*args, **kwargs) for func, args, kwargs in self.items]
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 131, in <listcomp>
    return [func(*args, **kwargs) for func, args, kwargs in self.items]
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/model_selection/_validation.py", line 238, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/pipeline.py", line 268, in fit
    Xt, fit_params = self._fit(X, y, **fit_params)
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/pipeline.py", line 234, in _fit
    Xt = transform.fit_transform(Xt, y, **fit_params_steps[name])
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/pipeline.py", line 737, in fit_transform
    for name, trans, weight in self._iter())
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 758, in __call__
    while self.dispatch_one_batch(iterator):
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 608, in dispatch_one_batch
    self._dispatch(tasks)
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 571, in _dispatch
    job = self._backend.apply_async(batch, callback=cb)
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 109, in apply_async
    result = ImmediateResult(func)
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 322, in __init__
    self.results = batch()
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 131, in __call__
    return [func(*args, **kwargs) for func, args, kwargs in self.items]
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 131, in <listcomp>
    return [func(*args, **kwargs) for func, args, kwargs in self.items]
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/pipeline.py", line 580, in _fit_transform_one
    res = transformer.fit_transform(X, y, **fit_params)
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/base.py", line 497, in fit_transform
    return self.fit(X, y, **fit_params).transform(X)
  File "a.py", line 38, in fit
    self.union.fit(x)
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/pipeline.py", line 712, in fit
    for _, trans, _ in self._iter())
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 758, in __call__
    while self.dispatch_one_batch(iterator):
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 608, in dispatch_one_batch
    self._dispatch(tasks)
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 571, in _dispatch
    job = self._backend.apply_async(batch, callback=cb)
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 109, in apply_async
    result = ImmediateResult(func)
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 322, in __init__
    self.results = batch()
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 131, in __call__
    return [func(*args, **kwargs) for func, args, kwargs in self.items]
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 131, in <listcomp>
    return [func(*args, **kwargs) for func, args, kwargs in self.items]
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/pipeline.py", line 566, in _fit_one_transformer
    return transformer.fit(X, y)
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/pipeline.py", line 268, in fit
    Xt, fit_params = self._fit(X, y, **fit_params)
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/pipeline.py", line 234, in _fit
    Xt = transform.fit_transform(Xt, y, **fit_params_steps[name])
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/base.py", line 494, in fit_transform
    return self.fit(X, **fit_params).transform(X)
  File "a.py", line 19, in transform
    return x[self.col]
  File "/Users/home/miniconda3/lib/python3.5/site-packages/pandas/core/frame.py", line 1992, in __getitem__
    return self._getitem_column(key)
  File "/Users/home/miniconda3/lib/python3.5/site-packages/pandas/core/frame.py", line 1999, in _getitem_column
    return self._get_item_cache(key)
  File "/Users/home/miniconda3/lib/python3.5/site-packages/pandas/core/generic.py", line 1345, in _get_item_cache
    values = self._data.get(item)
  File "/Users/home/miniconda3/lib/python3.5/site-packages/pandas/core/internals.py", line 3234, in get
    raise ValueError("cannot label index with a null key")
ValueError: cannot label index with a null key
回溯(最近一次呼叫最后一次):
文件“a.py”,第65行,在
交叉评分(管道、df、df['Y'],cv=2)
文件“/Users/home/miniconda3/lib/python3.5/site packages/sklearn/model_selection/_validation.py”,第140行,在cross_val_分数中
对于列车,在等速分段(X、y、组)中进行试验
文件“/Users/home/miniconda3/lib/python3.5/site packages/sklearn/externals/joblib/parallel.py”,第758行,在调用中__
而self.dispatch\u一批(迭代器):
文件“/Users/home/miniconda3/lib/python3.5/site packages/sklearn/externals/joblib/parallel.py”,第608行,分批发送
自我分配(任务)
文件“/Users/home/miniconda3/lib/python3.5/site packages/sklearn/externals/joblib/parallel.py”,第571行,在
作业=self.\u后端.apply\u异步(批处理,回调=cb)
文件“/Users/home/miniconda3/lib/python3.5/site packages/sklearn/externals/joblib/\u parallel\u backends.py”,第109行,在apply\u async中
结果=立即结果(func)
文件“/Users/home/miniconda3/lib/python3.5/site packages/sklearn/externals/joblib/_parallel_backends.py”,第322行,在初始化中__
self.results=batch()
文件“/Users/home/miniconda3/lib/python3.5/site packages/sklearn/externals/joblib/parallel.py”,第131行,在调用中__
返回[func(*args,**kwargs),用于self.items中的func、args、kwargs]
文件“/Users/home/miniconda3/lib/python3.5/site packages/sklearn/externals/joblib/parallel.py”,第131行,在
返回[func(*args,**kwargs),用于self.items中的func、args、kwargs]
文件“/Users/home/miniconda3/lib/python3.5/site packages/sklearn/model_selection/_validation.py”,第238行,在_fit_和_score中
估计值拟合(X_序列、y_序列、**拟合参数)
文件“/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/pipeline.py”,第268行,适合
Xt,拟合参数=自拟合(X,y,**拟合参数)
文件“/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/pipeline.py”,第234行,in_-fit
Xt=transform.fit_transform(Xt,y,**fit_参数_步骤[名称])
文件“/Users/home/miniconda3/lib/python3.5/site packages/sklearn/pipeline.py”,第737行,在fit_转换中
用于名称、运输、自身重量。_iter())
文件“/Users/home/miniconda3/lib/python3.5/site packages/sklearn/externals/joblib/parallel.py”,第758行,在调用中__
而self.dispatch\u一批(迭代器):
文件“/Users/home/miniconda3/lib/python3.5/site packages/sklearn/externals/joblib/parallel.py”,第608行,分批发送
自我分配(任务)
文件“/Users/home/miniconda3/lib/python3.5/site packages/sklearn/externals/joblib/parallel.py”,第571行,在
作业=self.\u后端.apply\u异步(批处理,回调=cb)
文件“/Users/home/miniconda3/lib/python3.5/site packages/sklearn/externals/joblib/\u parallel\u backends.py”,第109行,在apply\u async中
结果=立即结果(func)
文件“/Users/home/miniconda3/lib/python3.5/site packages/sklearn/externals/joblib/_parallel_backends.py”,第322行,在初始化中__
self.results=batch()
文件“/Users/home/miniconda3/lib/python3.5/site packages/sklearn/externals/joblib/parallel.py”,第131行,在调用中__
返回[func(*args,**kwargs),用于self.items中的func、args、kwargs]
文件“/Users/home/miniconda3/lib/python3.5/site packages/sklearn/externals/joblib/parallel.py”,第131行,在
返回[func(*args,**kwargs),用于self.items中的func、args、kwargs]
文件“/Users/home/miniconda3/lib/python3.5/site packages/sklearn/pipeline.py”,第580行,在fit\u transform\u one中
res=变换器。拟合变换(X,y,**拟合参数)
文件“/Users/home/miniconda3/lib/python3.5/site packages/sklearn/base.py”,第497行,在fit_转换中
返回self.fit(X,y,**fit_参数).transform(X)
文件“a.py”,第38行,适合
自联合拟合(x)
文件“/Users/home/miniconda3/lib/python3.5/site packages/sklearn/pipeline.py”,第712行,适合
对于u,trans,u,in self._iter())
文件“/Users/home/miniconda3/lib/python3.5/site packages/sklearn/externals/joblib/parallel.py”,第758行,在调用中__
而self.dispatch\u一批(迭代器):
文件“/Users/home/miniconda3/lib/python3.5/site packages/sklearn/externals/joblib/parallel.py”,第608行,分批发送
自我分配(任务)
文件“/Users/home/miniconda3/lib/python3.5/site packages/sklearn/externals/joblib/parallel.py”,第571行,在
作业=self.\u后端.apply\u异步(批处理,回调=cb)
文件“/Users/home/miniconda3/lib/python3.5/site packages/sklearn/externals/joblib/\u parallel\u backends.py”,第109行,在apply\u async中
结果=立即结果(func)
文件“/Users/home/miniconda3/lib/python3.5/site packages/sklearn/externals/joblib/_parallel_backends.py”,第322行,在初始化中__
self.results=batch()
文件“/Users/home/miniconda3/lib/python3.5/site packages/sklearn/externals/joblib/parallel.py”,第131行,在调用中__
返回[func(*args,**kwargs),用于self.items中的func、args、kwargs]
文件“/Users/home/miniconda3/lib/python3.5/site packages/sklearn/externals/joblib/parallel.py”,第131行,在
返回[func(*args,**kwargs),用于self.items中的func、args、kwargs]
文件“/Users/home/miniconda3/lib/python3.5/site packages/sklearn/pipeline.py”,第566行,在一个变压器中
回路变压器。安装(X,y)
文件“/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/pipeline.py”,第268行,适合
Xt,拟合参数=自拟合(X,y,**拟合参数)
文件“/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/pipeline.py”,第234行,in_-fit
Xt=transform.fit_transform(Xt,y,**fit_参数_步骤[名称])
文件“/Users/home/miniconda3/lib/python3.5/site packages/sklearn/base.py”,第494行,在fit_转换中
返回self.fit(X,**fit_参数).transform(X)
文件“a.py”,第19行,在转换中
返回x[self.col]
文件“/用户/home/minic