Python 糟糕的预测,但良好的模型准确性

Python 糟糕的预测,但良好的模型准确性,python,tensorflow,machine-learning,keras,Python,Tensorflow,Machine Learning,Keras,我试图在Tensorflow上构建我的第一个Keras深度神经网络,并希望使用Flask部署它。我拿了一份航空公司的数据样本,想预测航班是否会延误。首先,我只选取了样本列:Year、Month、DayOfWeek、UniqueCarrier、FlightNum、Origin、Dest、Distance。使用标签编码器将UniqueCarrier、Origin、Dest转换为数值。然后运行下面的程序后,发现精度为93%。但是当我通过RESTAPI发送参数来手动运行预测时,我总是得到1作为输出。不确

我试图在Tensorflow上构建我的第一个Keras深度神经网络,并希望使用Flask部署它。我拿了一份航空公司的数据样本,想预测航班是否会延误。首先,我只选取了样本列:
Year、Month、DayOfWeek、UniqueCarrier、FlightNum、Origin、Dest、Distance
。使用标签编码器将
UniqueCarrier、Origin、Dest
转换为数值。然后运行下面的程序后,发现精度为93%。但是当我通过RESTAPI发送参数来手动运行预测时,我总是得到1作为输出。不确定需要做什么

下面是一些代码和示例输出:

  le = LabelEncoder()

  data["UniqueCarrier"] = le.fit_transform(data["UniqueCarrier"])
  UniqueCarrier = list(le.classes_)
  print(UniqueCarrier)
  data["Origin"] = le.fit_transform(data["Origin"])
  Carrier = list(le.classes_)
  print(Carrier)
  data["Dest"] = le.fit_transform(data["Dest"])
  TailNum = list(le.classes_)
  print(TailNum)
数据设置为预测值和目标:

rfDataOriginal = pd.DataFrame(data)
Delay_YesNo = rfDataOriginal['IsDepDelayed']
rfDataOriginal.drop(['IsDepDelayed'], axis=1, inplace=True)
print('Dimension reduced to:')
print(len(rfDataOriginal.columns))
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
from keras.models import Sequential
from keras.layers import Dense
model = Sequential()
model.add(Dense(15, input_dim=12, activation='relu'))
model.add(Dense(15, activation='relu'))
model.add(Dense(15, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.summary()
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
import keras
tbCallBack = keras.callbacks.TensorBoard(log_dir='/tmp/keras_logs',  write_graph=True)
model.fit(X_train, y_train, epochs=5, batch_size=30,  verbose=1, callbacks=[tbCallBack])
from sklearn.metrics import confusion_matrix,accuracy_score
cm = confusion_matrix(y_test, Y_pred)
print("\nConfusion Matrix:")
print(cm)
acs = accuracy_score(y_test, Y_pred)
print("\nAccuracy Score: %.2f%%" % (acs * 100))

Confusion Matrix:
[[41614   322]
[ 5664 35894]]

 Accuracy Score: 92.83%
inputFeature = [1989, 9, 14, 1719, 1720, 1845, 1859, 11, 927, 58, 68, 997]
inputFeature = np.asarray(inputFeature).reshape(1, 12)
model.predict(inputFeature)

Output: array([[ 1.]], dtype=float32)

inputFeature = [1989, 11, 24, 1144, 1144, 1633, 1635, 0, 816, 213, 59, 1205]
inputFeature = np.asarray(inputFeature).reshape(1, 12)
model.predict(inputFeature)

array([[ 1.]], dtype=float32)
删除目标变量:

rfDataOriginal = pd.DataFrame(data)
Delay_YesNo = rfDataOriginal['IsDepDelayed']
rfDataOriginal.drop(['IsDepDelayed'], axis=1, inplace=True)
print('Dimension reduced to:')
print(len(rfDataOriginal.columns))
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
from keras.models import Sequential
from keras.layers import Dense
model = Sequential()
model.add(Dense(15, input_dim=12, activation='relu'))
model.add(Dense(15, activation='relu'))
model.add(Dense(15, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.summary()
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
import keras
tbCallBack = keras.callbacks.TensorBoard(log_dir='/tmp/keras_logs',  write_graph=True)
model.fit(X_train, y_train, epochs=5, batch_size=30,  verbose=1, callbacks=[tbCallBack])
from sklearn.metrics import confusion_matrix,accuracy_score
cm = confusion_matrix(y_test, Y_pred)
print("\nConfusion Matrix:")
print(cm)
acs = accuracy_score(y_test, Y_pred)
print("\nAccuracy Score: %.2f%%" % (acs * 100))

Confusion Matrix:
[[41614   322]
[ 5664 35894]]

 Accuracy Score: 92.83%
inputFeature = [1989, 9, 14, 1719, 1720, 1845, 1859, 11, 927, 58, 68, 997]
inputFeature = np.asarray(inputFeature).reshape(1, 12)
model.predict(inputFeature)

Output: array([[ 1.]], dtype=float32)

inputFeature = [1989, 11, 24, 1144, 1144, 1633, 1635, 0, 816, 213, 59, 1205]
inputFeature = np.asarray(inputFeature).reshape(1, 12)
model.predict(inputFeature)

array([[ 1.]], dtype=float32)
功能缩放:

rfDataOriginal = pd.DataFrame(data)
Delay_YesNo = rfDataOriginal['IsDepDelayed']
rfDataOriginal.drop(['IsDepDelayed'], axis=1, inplace=True)
print('Dimension reduced to:')
print(len(rfDataOriginal.columns))
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
from keras.models import Sequential
from keras.layers import Dense
model = Sequential()
model.add(Dense(15, input_dim=12, activation='relu'))
model.add(Dense(15, activation='relu'))
model.add(Dense(15, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.summary()
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
import keras
tbCallBack = keras.callbacks.TensorBoard(log_dir='/tmp/keras_logs',  write_graph=True)
model.fit(X_train, y_train, epochs=5, batch_size=30,  verbose=1, callbacks=[tbCallBack])
from sklearn.metrics import confusion_matrix,accuracy_score
cm = confusion_matrix(y_test, Y_pred)
print("\nConfusion Matrix:")
print(cm)
acs = accuracy_score(y_test, Y_pred)
print("\nAccuracy Score: %.2f%%" % (acs * 100))

Confusion Matrix:
[[41614   322]
[ 5664 35894]]

 Accuracy Score: 92.83%
inputFeature = [1989, 9, 14, 1719, 1720, 1845, 1859, 11, 927, 58, 68, 997]
inputFeature = np.asarray(inputFeature).reshape(1, 12)
model.predict(inputFeature)

Output: array([[ 1.]], dtype=float32)

inputFeature = [1989, 11, 24, 1144, 1144, 1633, 1635, 0, 816, 213, 59, 1205]
inputFeature = np.asarray(inputFeature).reshape(1, 12)
model.predict(inputFeature)

array([[ 1.]], dtype=float32)
创建模型:

rfDataOriginal = pd.DataFrame(data)
Delay_YesNo = rfDataOriginal['IsDepDelayed']
rfDataOriginal.drop(['IsDepDelayed'], axis=1, inplace=True)
print('Dimension reduced to:')
print(len(rfDataOriginal.columns))
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
from keras.models import Sequential
from keras.layers import Dense
model = Sequential()
model.add(Dense(15, input_dim=12, activation='relu'))
model.add(Dense(15, activation='relu'))
model.add(Dense(15, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.summary()
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
import keras
tbCallBack = keras.callbacks.TensorBoard(log_dir='/tmp/keras_logs',  write_graph=True)
model.fit(X_train, y_train, epochs=5, batch_size=30,  verbose=1, callbacks=[tbCallBack])
from sklearn.metrics import confusion_matrix,accuracy_score
cm = confusion_matrix(y_test, Y_pred)
print("\nConfusion Matrix:")
print(cm)
acs = accuracy_score(y_test, Y_pred)
print("\nAccuracy Score: %.2f%%" % (acs * 100))

Confusion Matrix:
[[41614   322]
[ 5664 35894]]

 Accuracy Score: 92.83%
inputFeature = [1989, 9, 14, 1719, 1720, 1845, 1859, 11, 927, 58, 68, 997]
inputFeature = np.asarray(inputFeature).reshape(1, 12)
model.predict(inputFeature)

Output: array([[ 1.]], dtype=float32)

inputFeature = [1989, 11, 24, 1144, 1144, 1633, 1635, 0, 816, 213, 59, 1205]
inputFeature = np.asarray(inputFeature).reshape(1, 12)
model.predict(inputFeature)

array([[ 1.]], dtype=float32)
编译模型:

rfDataOriginal = pd.DataFrame(data)
Delay_YesNo = rfDataOriginal['IsDepDelayed']
rfDataOriginal.drop(['IsDepDelayed'], axis=1, inplace=True)
print('Dimension reduced to:')
print(len(rfDataOriginal.columns))
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
from keras.models import Sequential
from keras.layers import Dense
model = Sequential()
model.add(Dense(15, input_dim=12, activation='relu'))
model.add(Dense(15, activation='relu'))
model.add(Dense(15, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.summary()
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
import keras
tbCallBack = keras.callbacks.TensorBoard(log_dir='/tmp/keras_logs',  write_graph=True)
model.fit(X_train, y_train, epochs=5, batch_size=30,  verbose=1, callbacks=[tbCallBack])
from sklearn.metrics import confusion_matrix,accuracy_score
cm = confusion_matrix(y_test, Y_pred)
print("\nConfusion Matrix:")
print(cm)
acs = accuracy_score(y_test, Y_pred)
print("\nAccuracy Score: %.2f%%" % (acs * 100))

Confusion Matrix:
[[41614   322]
[ 5664 35894]]

 Accuracy Score: 92.83%
inputFeature = [1989, 9, 14, 1719, 1720, 1845, 1859, 11, 927, 58, 68, 997]
inputFeature = np.asarray(inputFeature).reshape(1, 12)
model.predict(inputFeature)

Output: array([[ 1.]], dtype=float32)

inputFeature = [1989, 11, 24, 1144, 1144, 1633, 1635, 0, 816, 213, 59, 1205]
inputFeature = np.asarray(inputFeature).reshape(1, 12)
model.predict(inputFeature)

array([[ 1.]], dtype=float32)
用于张力板图形的日志:

rfDataOriginal = pd.DataFrame(data)
Delay_YesNo = rfDataOriginal['IsDepDelayed']
rfDataOriginal.drop(['IsDepDelayed'], axis=1, inplace=True)
print('Dimension reduced to:')
print(len(rfDataOriginal.columns))
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
from keras.models import Sequential
from keras.layers import Dense
model = Sequential()
model.add(Dense(15, input_dim=12, activation='relu'))
model.add(Dense(15, activation='relu'))
model.add(Dense(15, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.summary()
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
import keras
tbCallBack = keras.callbacks.TensorBoard(log_dir='/tmp/keras_logs',  write_graph=True)
model.fit(X_train, y_train, epochs=5, batch_size=30,  verbose=1, callbacks=[tbCallBack])
from sklearn.metrics import confusion_matrix,accuracy_score
cm = confusion_matrix(y_test, Y_pred)
print("\nConfusion Matrix:")
print(cm)
acs = accuracy_score(y_test, Y_pred)
print("\nAccuracy Score: %.2f%%" % (acs * 100))

Confusion Matrix:
[[41614   322]
[ 5664 35894]]

 Accuracy Score: 92.83%
inputFeature = [1989, 9, 14, 1719, 1720, 1845, 1859, 11, 927, 58, 68, 997]
inputFeature = np.asarray(inputFeature).reshape(1, 12)
model.predict(inputFeature)

Output: array([[ 1.]], dtype=float32)

inputFeature = [1989, 11, 24, 1144, 1144, 1633, 1635, 0, 816, 213, 59, 1205]
inputFeature = np.asarray(inputFeature).reshape(1, 12)
model.predict(inputFeature)

array([[ 1.]], dtype=float32)
适合模型:

rfDataOriginal = pd.DataFrame(data)
Delay_YesNo = rfDataOriginal['IsDepDelayed']
rfDataOriginal.drop(['IsDepDelayed'], axis=1, inplace=True)
print('Dimension reduced to:')
print(len(rfDataOriginal.columns))
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
from keras.models import Sequential
from keras.layers import Dense
model = Sequential()
model.add(Dense(15, input_dim=12, activation='relu'))
model.add(Dense(15, activation='relu'))
model.add(Dense(15, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.summary()
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
import keras
tbCallBack = keras.callbacks.TensorBoard(log_dir='/tmp/keras_logs',  write_graph=True)
model.fit(X_train, y_train, epochs=5, batch_size=30,  verbose=1, callbacks=[tbCallBack])
from sklearn.metrics import confusion_matrix,accuracy_score
cm = confusion_matrix(y_test, Y_pred)
print("\nConfusion Matrix:")
print(cm)
acs = accuracy_score(y_test, Y_pred)
print("\nAccuracy Score: %.2f%%" % (acs * 100))

Confusion Matrix:
[[41614   322]
[ 5664 35894]]

 Accuracy Score: 92.83%
inputFeature = [1989, 9, 14, 1719, 1720, 1845, 1859, 11, 927, 58, 68, 997]
inputFeature = np.asarray(inputFeature).reshape(1, 12)
model.predict(inputFeature)

Output: array([[ 1.]], dtype=float32)

inputFeature = [1989, 11, 24, 1144, 1144, 1633, 1635, 0, 816, 213, 59, 1205]
inputFeature = np.asarray(inputFeature).reshape(1, 12)
model.predict(inputFeature)

array([[ 1.]], dtype=float32)
创建混淆矩阵:

rfDataOriginal = pd.DataFrame(data)
Delay_YesNo = rfDataOriginal['IsDepDelayed']
rfDataOriginal.drop(['IsDepDelayed'], axis=1, inplace=True)
print('Dimension reduced to:')
print(len(rfDataOriginal.columns))
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
from keras.models import Sequential
from keras.layers import Dense
model = Sequential()
model.add(Dense(15, input_dim=12, activation='relu'))
model.add(Dense(15, activation='relu'))
model.add(Dense(15, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.summary()
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
import keras
tbCallBack = keras.callbacks.TensorBoard(log_dir='/tmp/keras_logs',  write_graph=True)
model.fit(X_train, y_train, epochs=5, batch_size=30,  verbose=1, callbacks=[tbCallBack])
from sklearn.metrics import confusion_matrix,accuracy_score
cm = confusion_matrix(y_test, Y_pred)
print("\nConfusion Matrix:")
print(cm)
acs = accuracy_score(y_test, Y_pred)
print("\nAccuracy Score: %.2f%%" % (acs * 100))

Confusion Matrix:
[[41614   322]
[ 5664 35894]]

 Accuracy Score: 92.83%
inputFeature = [1989, 9, 14, 1719, 1720, 1845, 1859, 11, 927, 58, 68, 997]
inputFeature = np.asarray(inputFeature).reshape(1, 12)
model.predict(inputFeature)

Output: array([[ 1.]], dtype=float32)

inputFeature = [1989, 11, 24, 1144, 1144, 1633, 1635, 0, 816, 213, 59, 1205]
inputFeature = np.asarray(inputFeature).reshape(1, 12)
model.predict(inputFeature)

array([[ 1.]], dtype=float32)
通过参数进行预测测试:

rfDataOriginal = pd.DataFrame(data)
Delay_YesNo = rfDataOriginal['IsDepDelayed']
rfDataOriginal.drop(['IsDepDelayed'], axis=1, inplace=True)
print('Dimension reduced to:')
print(len(rfDataOriginal.columns))
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
from keras.models import Sequential
from keras.layers import Dense
model = Sequential()
model.add(Dense(15, input_dim=12, activation='relu'))
model.add(Dense(15, activation='relu'))
model.add(Dense(15, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.summary()
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
import keras
tbCallBack = keras.callbacks.TensorBoard(log_dir='/tmp/keras_logs',  write_graph=True)
model.fit(X_train, y_train, epochs=5, batch_size=30,  verbose=1, callbacks=[tbCallBack])
from sklearn.metrics import confusion_matrix,accuracy_score
cm = confusion_matrix(y_test, Y_pred)
print("\nConfusion Matrix:")
print(cm)
acs = accuracy_score(y_test, Y_pred)
print("\nAccuracy Score: %.2f%%" % (acs * 100))

Confusion Matrix:
[[41614   322]
[ 5664 35894]]

 Accuracy Score: 92.83%
inputFeature = [1989, 9, 14, 1719, 1720, 1845, 1859, 11, 927, 58, 68, 997]
inputFeature = np.asarray(inputFeature).reshape(1, 12)
model.predict(inputFeature)

Output: array([[ 1.]], dtype=float32)

inputFeature = [1989, 11, 24, 1144, 1144, 1633, 1635, 0, 816, 213, 59, 1205]
inputFeature = np.asarray(inputFeature).reshape(1, 12)
model.predict(inputFeature)

array([[ 1.]], dtype=float32)

如果您正常地执行预测(即不使用RESTAPI进行预测),您会得到正确的结果吗?似乎您有一个类不平衡的问题。你们每班有多少个样品?
1
是大多数舱位吗?这似乎是对的:所有航班都延误了。这完全符合我的经验。