Warning: file_get_contents(/data/phpspider/zhask/data//catemap/2/python/344.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
XGBOOST功能名称错误-Python_Python_Xgboost - Fatal编程技术网

XGBOOST功能名称错误-Python

XGBOOST功能名称错误-Python,python,xgboost,Python,Xgboost,也许这个问题已经以不同的形式被问过很多次了。但是,我的问题是,当我对类似产品的数据使用XGBClassifier()时,会出现特征名称不匹配错误。我希望有人能告诉我我做错了什么。这是我的密码。顺便说一句,数据完全由以下部分组成: import pandas as pd from sklearn.preprocessing import LabelEncoder, OneHotEncoder from sklearn.model_selection import train_test_split,

也许这个问题已经以不同的形式被问过很多次了。但是,我的问题是,当我对类似产品的数据使用
XGBClassifier()
时,会出现特征名称不匹配错误。我希望有人能告诉我我做错了什么。这是我的密码。顺便说一句,数据完全由以下部分组成:

import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.metrics import accuracy_score
import xgboost as xgb

data = {"Age":[44,27,30,38,40,35,70,48,50,37],
        "BMI":["25-29","35-39","30-35","40-45","45-49","20-25","<19",">70","50-55","55-59"],
        "BP":["<140/90",">140/90",">140/90",">140/90","<140/90","<140/90","<140/90",">140/90",">140/90","<140/90"],
        "Risk":["No","Yes","Yes","Yes","No","No","No","Yes","Yes","No"]}

df = pd.DataFrame(data)

X = df.iloc[:, :-1]
y = df.iloc[:, -1]

labelencoder = LabelEncoder()

def encoder_X(columns):
    for i in columns:
        X.iloc[:, i] = labelencoder.fit_transform(X.iloc[:, i])

encoder_X([1,2])

y = labelencoder.fit_transform(y)

onehotencdoer = OneHotEncoder(categorical_features = [[1,2]])
X = onehotencdoer.fit_transform(X).toarray()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 13)

model = xgb.XGBClassifier()
model.fit(X_train, y_train, verbose = True)

y_pred = model.predict(X_test)
predictions = [round(value) for value in y_pred]

accuracy = accuracy_score(y_test, predictions)
print("Accuracy: {0}%".format(accuracy*100))

似乎编码器安装后需要保存。我使用了
sklearn
中的
joblib
。杰森给了我保存编码器的想法。以下为编辑版本:

import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.metrics import accuracy_score
from sklearn.externals import joblib
import xgboost as xgb

data = {"Age":[44,27,30,38,40,35,70,48,50,37],
        "BMI":["25-29","35-39","30-35","40-45","45-49","20-25","<19",">70","50-55","55-59"],
        "BP":["<140/90",">140/90",">140/90",">140/90","<140/90","<140/90","<140/90",">140/90",">140/90","<140/90"],
        "Risk":["No","Yes","Yes","Yes","No","No","No","Yes","Yes","No"]}

df = pd.DataFrame(data)

X = df.iloc[:, :-1]
y = df.iloc[:, -1]

labelencoder = LabelEncoder()

def encoder_X(columns):
    for i in columns:
        X.iloc[:, i] = labelencoder.fit_transform(X.iloc[:, i])

encoder_X([1,2])

y = labelencoder.fit_transform(y)

onehotencdoer = OneHotEncoder(categorical_features = [[1,2]])
onehotencdoer.fit(X)
enc = joblib.dump(onehotencdoer, "encoder.pkl") # save the fitted encoder
X = onehotencdoer.transform(X).toarray()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 13)

model = xgb.XGBClassifier()
model.fit(X_train, y_train, verbose = True)

y_pred = model.predict(X_test)
predictions = [round(value) for value in y_pred]

accuracy = accuracy_score(y_test, predictions)
print("Accuracy: {0}%".format(accuracy*100))
将熊猫作为pd导入
从sklearn.preprocessing导入LabelEncoder,OneHotEncoder
从sklearn.model_选择导入训练测试_分割、KFold、cross_val_分数
从sklearn.metrics导入准确性\u分数
从sklearn.externals导入作业库
将xgboost作为xgb导入
数据={“年龄”:[44,27,30,38,40,35,70,48,50,37],
“BMI”:[“25-29”、“35-39”、“30-35”、“40-45”、“45-49”、“20-25”、“70”、“50-55”、“55-59”],
“BP”:[“140/90”、“140/90”、“140/90”、“140/90”]
prod_df=pd.DataFrame(proddata)
def编码器产品(列):
对于列中的i:
prod_df.iloc[:,i]=labelencoder.fit_变换(prod_df.iloc[:,i])
编码器_prod([1,2])
enc=joblib.load(“encoder.pkl”)
prod_df=enc.transform(prod_df).toarray()
预测=模型预测(产品df)
结果=[预测中val的四舍五入(val)]
它似乎对这个例子有效,我将在更大的数据集上尝试这种方法。 请告诉我你的想法


谢谢

能否请您发布堆栈跟踪,以便我们知道代码面临的错误在哪里。@UpasanaMittal-当然。我将编辑我的问题以显示错误发生的位置。它就在模型预测(prod_df)线之后。尽管如此,我可能已经找到了今天早些时候发布的答案。我只是在等待更多的反馈。谢谢,我把这项技术应用到工作中的一个数据集上,它成功了。
predictions = model.predict(prod_df)
Traceback (most recent call last):

  File "<ipython-input-24-456b5626e711>", line 1, in <module>
    predictions = model.predict(prod_df)

  File "c:\users\sozdemir\appdata\local\programs\python\python35\lib\site-packages\xgboost\sklearn.py", line 526, in predict
    ntree_limit=ntree_limit)

  File "c:\users\sozdemir\appdata\local\programs\python\python35\lib\site-packages\xgboost\core.py", line 1044, in predict
    self._validate_features(data)

  File "c:\users\sozdemir\appdata\local\programs\python\python35\lib\site-packages\xgboost\core.py", line 1288, in _validate_features
    data.feature_names))

ValueError: feature_names mismatch: ['f0', 'f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11', 'f12'] ['f0', 'f1', 'f2', 'f3', 'f4', 'f5']
expected f6, f11, f12, f9, f7, f8, f10 in input data
xgboost.__version__
Out[37]: '0.7'
import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.metrics import accuracy_score
from sklearn.externals import joblib
import xgboost as xgb

data = {"Age":[44,27,30,38,40,35,70,48,50,37],
        "BMI":["25-29","35-39","30-35","40-45","45-49","20-25","<19",">70","50-55","55-59"],
        "BP":["<140/90",">140/90",">140/90",">140/90","<140/90","<140/90","<140/90",">140/90",">140/90","<140/90"],
        "Risk":["No","Yes","Yes","Yes","No","No","No","Yes","Yes","No"]}

df = pd.DataFrame(data)

X = df.iloc[:, :-1]
y = df.iloc[:, -1]

labelencoder = LabelEncoder()

def encoder_X(columns):
    for i in columns:
        X.iloc[:, i] = labelencoder.fit_transform(X.iloc[:, i])

encoder_X([1,2])

y = labelencoder.fit_transform(y)

onehotencdoer = OneHotEncoder(categorical_features = [[1,2]])
onehotencdoer.fit(X)
enc = joblib.dump(onehotencdoer, "encoder.pkl") # save the fitted encoder
X = onehotencdoer.transform(X).toarray()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 13)

model = xgb.XGBClassifier()
model.fit(X_train, y_train, verbose = True)

y_pred = model.predict(X_test)
predictions = [round(value) for value in y_pred]

accuracy = accuracy_score(y_test, predictions)
print("Accuracy: {0}%".format(accuracy*100))
proddata = {"Age":[65,50,37],
        "BMI":["25-29","35-39","30-35"],
        "BP":["<140/90",">140/90",">140/90"]}

prod_df = pd.DataFrame(proddata)

def encoder_prod(columns):
    for i in columns:
        prod_df.iloc[:, i] = labelencoder.fit_transform(prod_df.iloc[:, i])

encoder_prod([1,2])
enc = joblib.load("encoder.pkl")
prod_df = enc.transform(prod_df).toarray()

predictions = model.predict(prod_df)
results = [round(val) for val in predictions]