Python sklearn df问题-字段Cady示例代码问题
我正在阅读Field Cady的《数据科学手册》,这里有示例代码: 我从代码的第23行得到语法错误,即:Python sklearn df问题-字段Cady示例代码问题,python,scikit-learn,sklearn-pandas,Python,Scikit Learn,Sklearn Pandas,我正在阅读Field Cady的《数据科学手册》,这里有示例代码: 我从代码的第23行得到语法错误,即: File "<ipython-input-4-02028cc326e3>", line 2 X, Y = df[df.columns[:3]], (df['species']=='virginica') X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=.8)
File "<ipython-input-4-02028cc326e3>", line 2
X, Y = df[df.columns[:3]], (df['species']=='virginica') X_train, X_test,
Y_train, Y_test = train_test_split(X, Y, test_size=.8)
^
SyntaxError: invalid syntax
您必须首先加载
iris
数据集。这是您的更新代码
from matplotlib import pyplot as plt
import sklearn
from sklearn.metrics import roc_curve, auc
from sklearn.cross_validation import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
# name -> (line format, classifier)
from sklearn.datasets import load_iris
import pandas as pd
data = load_iris()
df = pd.DataFrame(data['data'], columns=data['feature_names'])
df['species'] = pd.Categorical.from_codes(iris.target, iris.target_names)
CLASS_MAP = {
'LogisticRegression':
('-', LogisticRegression()),
'Naive Bayes': ('--', GaussianNB()),
'Decision Tree':
('.-', DecisionTreeClassifier(max_depth=5)),
'Random Forest':
(':', RandomForestClassifier(
max_depth=5, n_estimators=10,
max_features=1)),
}
# Divide cols by independent/dependent, rows by test/ train
X, Y = df[df.columns[:3]], (df['species']=='virginica')
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=.8)
for name, (line_fmt, model) in CLASS_MAP.items():
model.fit(X_train, Y_train)
# array w one col per label
preds = model.predict_proba(X_test)
pred = pd.Series(preds[:,1])
fpr, tpr, thresholds = roc_curve(Y_test, pred)
auc_score = auc(fpr, tpr)
label='%s: auc=%f' % (name, auc_score)
plt.plot(fpr, tpr, line_fmt,
linewidth=5, label=label)
plt.legend(loc="lower right")
plt.title('Comparing Classifiers')
plt.plot([0, 1], [0, 1], 'k--') #x=y line. Visual aid
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.show()
X\u train
之前缺少一个逗号。非常感谢,这给了我新的错误。。。不确定它在评论中是否显示得很好,^指向第一个=符号X,Y=df[df.columns[:3],(df['species']='virginica'),X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=.8)^语法错误:无法分配到比较请发布代码,不仅仅是错误,所以我们可以给你更好的指导。谢谢你,萨默拉,我也在原始帖子的底部添加了它。
from matplotlib import pyplot as plt
import sklearn
from sklearn.metrics import roc_curve, auc
from sklearn.cross_validation import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
# name -> (line format, classifier)
from sklearn.datasets import load_iris
import pandas as pd
data = load_iris()
df = pd.DataFrame(data['data'], columns=data['feature_names'])
df['species'] = pd.Categorical.from_codes(iris.target, iris.target_names)
CLASS_MAP = {
'LogisticRegression':
('-', LogisticRegression()),
'Naive Bayes': ('--', GaussianNB()),
'Decision Tree':
('.-', DecisionTreeClassifier(max_depth=5)),
'Random Forest':
(':', RandomForestClassifier(
max_depth=5, n_estimators=10,
max_features=1)),
}
# Divide cols by independent/dependent, rows by test/ train
X, Y = df[df.columns[:3]], (df['species']=='virginica')
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=.8)
for name, (line_fmt, model) in CLASS_MAP.items():
model.fit(X_train, Y_train)
# array w one col per label
preds = model.predict_proba(X_test)
pred = pd.Series(preds[:,1])
fpr, tpr, thresholds = roc_curve(Y_test, pred)
auc_score = auc(fpr, tpr)
label='%s: auc=%f' % (name, auc_score)
plt.plot(fpr, tpr, line_fmt,
linewidth=5, label=label)
plt.legend(loc="lower right")
plt.title('Comparing Classifiers')
plt.plot([0, 1], [0, 1], 'k--') #x=y line. Visual aid
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.show()