Python 如果ColumnTransformer没有获取\u功能\u名称属性,如何从SelectKBest()获取选定列?

Python 如果ColumnTransformer没有获取\u功能\u名称属性,如何从SelectKBest()获取选定列?,python,python-3.x,scikit-learn,Python,Python 3.x,Scikit Learn,我想知道,通过使用SelectKBest()选择了哪些功能,因此我首先选择了ColumnTransformer() 然后,选择kbest(): 我现在很难理解选择了哪些功能。我知道skb.get\u support()和ct.get\u feature\u names(),但是ct.get\u feature\u names()给了我 AttributeError: Transformer scaling (type MinMaxScaler) does not provide get_feat

我想知道,通过使用
SelectKBest()
选择了哪些功能,因此我首先选择了
ColumnTransformer()

然后,
选择kbest()

我现在很难理解选择了哪些功能。我知道
skb.get\u support()
ct.get\u feature\u names()
,但是
ct.get\u feature\u names()给了我

AttributeError: Transformer scaling (type MinMaxScaler) does not provide get_feature_names.

对于您的情况,可以首先将列名存储在列表中,检查转换器是否具有
get\u feature\u names
属性,然后调用它,否则将存储原始列名

import itertools

cols = [(transformer[1].get_feature_names() if getattr(transformer[1], "get_feature_names", None) else transformer[2]) 
        for transformer in ct.transformers_]

cols = list(itertools.chain(*cols))
然后根据从
get_support()
方法
SelecKBest

from itertools import compress

list(compress(cols, skb.get_support()))
完整可复制示例

import random
import itertools
import pandas as pd
from itertools import compress
from sklearn.datasets import load_iris
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MinMaxScaler


# First build some data with categorical and numerical features
data = load_iris()
X, y, feature_names = data['data'], data['target'], data['feature_names']
X = pd.DataFrame(X, columns=feature_names)
X['some_location'] = [random.choice(['NY', 'Texas', 'Boston']) for _ in range(X.shape[0])]

# Apply the column transformers
num_features = ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
cat_features = ['some_location']

ct = ColumnTransformer([
    ("scaling", MinMaxScaler(), num_features),
    ("onehot", OneHotEncoder(sparse=False, handle_unknown='ignore'), cat_features)], 
    remainder='passthrough') #pass through
X_train_trans = ct.fit_transform(X)

# Get the column names
cols = [(transformer[1].get_feature_names() if getattr(transformer[1], "get_feature_names", None) else transformer[2]) 
        for transformer in ct.transformers_]

cols = list(itertools.chain(*cols))
cols
>>>
['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)',
 'x0_Boston',
 'x0_NY',
 'x0_Texas']

# Apply SelectKBest
skb = SelectKBest(chi2, k=4)
X_train_trans_select = skb.fit_transform(X_train_trans, y)

# Get selected columns
list(compress(cols, skb.get_support()))
>>>
['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']
from itertools import compress

list(compress(cols, skb.get_support()))
import random
import itertools
import pandas as pd
from itertools import compress
from sklearn.datasets import load_iris
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MinMaxScaler


# First build some data with categorical and numerical features
data = load_iris()
X, y, feature_names = data['data'], data['target'], data['feature_names']
X = pd.DataFrame(X, columns=feature_names)
X['some_location'] = [random.choice(['NY', 'Texas', 'Boston']) for _ in range(X.shape[0])]

# Apply the column transformers
num_features = ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
cat_features = ['some_location']

ct = ColumnTransformer([
    ("scaling", MinMaxScaler(), num_features),
    ("onehot", OneHotEncoder(sparse=False, handle_unknown='ignore'), cat_features)], 
    remainder='passthrough') #pass through
X_train_trans = ct.fit_transform(X)

# Get the column names
cols = [(transformer[1].get_feature_names() if getattr(transformer[1], "get_feature_names", None) else transformer[2]) 
        for transformer in ct.transformers_]

cols = list(itertools.chain(*cols))
cols
>>>
['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)',
 'x0_Boston',
 'x0_NY',
 'x0_Texas']

# Apply SelectKBest
skb = SelectKBest(chi2, k=4)
X_train_trans_select = skb.fit_transform(X_train_trans, y)

# Get selected columns
list(compress(cols, skb.get_support()))
>>>
['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']