Tensorflow 使用TF Keras ImageDataGenerator时,预测返回相同的值
我正在使用Tensorflow 使用TF Keras ImageDataGenerator时,预测返回相同的值,tensorflow,tensorflow2.0,tf.keras,Tensorflow,Tensorflow2.0,Tf.keras,我正在使用猫狗数据集来训练使用Tensorflow Keras的模型,并使用ImageDataGenerator.flow_从_目录读取文件 训练和验证的准确性是不错的,但当尝试对测试数据进行预测时,模型预测的是所有图像的同一类别 培训代码如下所示: import os, shutil import tensorflow as tf from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dropout, Dense
猫狗数据集
来训练使用Tensorflow Keras
的模型,并使用ImageDataGenerator.flow_从_目录
读取文件
训练和验证的准确性是不错的,但当尝试对测试数据进行预测时,模型预测的是所有图像的同一类别
培训代码如下所示:
import os, shutil
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dropout, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.losses import binary_crossentropy
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
import matplotlib.pyplot as plt
# Path to Training Directory
train_dir = 'Dogs_Vs_Cats_Small/train'
# Path to Validation Directory
validation_dir = 'Dogs_Vs_Cats_Small/validation'
#### Create the Convolutional Base
Max_Pool_Size = (2,2)
model = Sequential([
Conv2D(input_shape = (150, 150, 3), filters = 32, kernel_size = (3,3), activation = 'relu',
padding = 'valid', data_format = 'channels_last'),
MaxPooling2D(pool_size = Max_Pool_Size),
Conv2D(filters = 64, kernel_size = (3,3), activation = 'relu', padding = 'valid'),
MaxPooling2D(pool_size = Max_Pool_Size),
Conv2D(filters = 128, kernel_size = (3,3), activation = 'relu', padding = 'valid'),
MaxPooling2D(pool_size = Max_Pool_Size),
Conv2D(filters = 128, kernel_size = (3,3), activation = 'relu', padding = 'valid'),
MaxPooling2D(pool_size = Max_Pool_Size)
])
#### Define the Dense Layers on Top of Convolutional Base
model.add(Flatten())
model.add(Dense(units = 512, activation = 'relu'))
model.add(Dense(units = 1, activation = 'sigmoid'))
model.summary()
model.compile(optimizer = RMSprop(learning_rate = 0.001), loss = 'binary_crossentropy', metrics = 'acc')
Train_Gen = ImageDataGenerator(1./255)
Val_Gen = ImageDataGenerator(1./255)
Train_Generator = Train_Gen.flow_from_directory(train_dir, target_size = (150,150), batch_size = 20,
class_mode = 'binary')
Val_Generator = Val_Gen.flow_from_directory(validation_dir, target_size = (150, 150), class_mode = 'binary',
batch_size = 20)
batch_size = 20
target_size = (150,150)
No_Of_Training_Images = Train_Generator.classes.shape[0]
No_Of_Val_Images = Val_Generator.classes.shape[0]
steps_per_epoch = No_Of_Training_Images/batch_size
validation_steps = No_Of_Val_Images/batch_size
history = model.fit(x = Train_Generator, shuffle=True, epochs = 20,
steps_per_epoch = steps_per_epoch,
validation_data = Val_Generator
, validation_steps = validation_steps
)
Test_Dir = 'Dogs_Vs_Cats_Very_Small/test'
Test_Generator = ImageDataGenerator(1./255).flow_from_directory(Test_Dir,
target_size = (150,150), batch_size = 1,
shuffle = False, class_mode = 'binary') # This outputs Found 17 images belonging to 2 classes.
No_Of_Samples = len(Test_Generator.filenames)
testPredictions = model.predict(Test_Generator, steps = No_Of_Samples)
predictedClassIndices=np.argmax(testPredictions,axis=1)
print(predictedClassIndices)
filenames = Test_Generator.filenames
for f in range(len(filenames)):
print(filenames[f],":",predictedClassIndices[f])
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
cats/cat.1546.jpg : 0
cats/cat.1547.jpg : 0
cats/cat.1548.jpg : 0
cats/cat.1549.jpg : 0
cats/cat.1550.jpg : 0
cats/cat.1566.jpg : 0
cats/cat.1593.jpg : 0
cats/cat.1594.jpg : 0
dogs/dog.1514.jpg : 0
dogs/dog.1520.jpg : 0
dogs/dog.1525.jpg : 0
dogs/dog.1551.jpg : 0
dogs/dog.1555.jpg : 0
dogs/dog.1574.jpg : 0
dogs/dog.1594.jpg : 0
dogs/dog.1597.jpg : 0
dogs/dog.1599.jpg : 0
[[1.0473319e-05]
[9.8473930e-01]
[2.9069009e-01]
[5.0639841e-07]
[1.8511847e-01]
[6.0166395e-01]
[4.2568660e-01]
[4.6028453e-01]
[7.8800195e-01]
[8.5675471e-02]
[8.2654454e-02]
[7.2898394e-01]
[1.5504999e-01]
[8.2106847e-01]
[8.7003058e-01]
[9.9999285e-01]
[5.1210046e-01]]
现在,我对测试数据进行预测,如下所示:
import os, shutil
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dropout, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.losses import binary_crossentropy
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
import matplotlib.pyplot as plt
# Path to Training Directory
train_dir = 'Dogs_Vs_Cats_Small/train'
# Path to Validation Directory
validation_dir = 'Dogs_Vs_Cats_Small/validation'
#### Create the Convolutional Base
Max_Pool_Size = (2,2)
model = Sequential([
Conv2D(input_shape = (150, 150, 3), filters = 32, kernel_size = (3,3), activation = 'relu',
padding = 'valid', data_format = 'channels_last'),
MaxPooling2D(pool_size = Max_Pool_Size),
Conv2D(filters = 64, kernel_size = (3,3), activation = 'relu', padding = 'valid'),
MaxPooling2D(pool_size = Max_Pool_Size),
Conv2D(filters = 128, kernel_size = (3,3), activation = 'relu', padding = 'valid'),
MaxPooling2D(pool_size = Max_Pool_Size),
Conv2D(filters = 128, kernel_size = (3,3), activation = 'relu', padding = 'valid'),
MaxPooling2D(pool_size = Max_Pool_Size)
])
#### Define the Dense Layers on Top of Convolutional Base
model.add(Flatten())
model.add(Dense(units = 512, activation = 'relu'))
model.add(Dense(units = 1, activation = 'sigmoid'))
model.summary()
model.compile(optimizer = RMSprop(learning_rate = 0.001), loss = 'binary_crossentropy', metrics = 'acc')
Train_Gen = ImageDataGenerator(1./255)
Val_Gen = ImageDataGenerator(1./255)
Train_Generator = Train_Gen.flow_from_directory(train_dir, target_size = (150,150), batch_size = 20,
class_mode = 'binary')
Val_Generator = Val_Gen.flow_from_directory(validation_dir, target_size = (150, 150), class_mode = 'binary',
batch_size = 20)
batch_size = 20
target_size = (150,150)
No_Of_Training_Images = Train_Generator.classes.shape[0]
No_Of_Val_Images = Val_Generator.classes.shape[0]
steps_per_epoch = No_Of_Training_Images/batch_size
validation_steps = No_Of_Val_Images/batch_size
history = model.fit(x = Train_Generator, shuffle=True, epochs = 20,
steps_per_epoch = steps_per_epoch,
validation_data = Val_Generator
, validation_steps = validation_steps
)
Test_Dir = 'Dogs_Vs_Cats_Very_Small/test'
Test_Generator = ImageDataGenerator(1./255).flow_from_directory(Test_Dir,
target_size = (150,150), batch_size = 1,
shuffle = False, class_mode = 'binary') # This outputs Found 17 images belonging to 2 classes.
No_Of_Samples = len(Test_Generator.filenames)
testPredictions = model.predict(Test_Generator, steps = No_Of_Samples)
predictedClassIndices=np.argmax(testPredictions,axis=1)
print(predictedClassIndices)
filenames = Test_Generator.filenames
for f in range(len(filenames)):
print(filenames[f],":",predictedClassIndices[f])
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
cats/cat.1546.jpg : 0
cats/cat.1547.jpg : 0
cats/cat.1548.jpg : 0
cats/cat.1549.jpg : 0
cats/cat.1550.jpg : 0
cats/cat.1566.jpg : 0
cats/cat.1593.jpg : 0
cats/cat.1594.jpg : 0
dogs/dog.1514.jpg : 0
dogs/dog.1520.jpg : 0
dogs/dog.1525.jpg : 0
dogs/dog.1551.jpg : 0
dogs/dog.1555.jpg : 0
dogs/dog.1574.jpg : 0
dogs/dog.1594.jpg : 0
dogs/dog.1597.jpg : 0
dogs/dog.1599.jpg : 0
[[1.0473319e-05]
[9.8473930e-01]
[2.9069009e-01]
[5.0639841e-07]
[1.8511847e-01]
[6.0166395e-01]
[4.2568660e-01]
[4.6028453e-01]
[7.8800195e-01]
[8.5675471e-02]
[8.2654454e-02]
[7.2898394e-01]
[1.5504999e-01]
[8.2106847e-01]
[8.7003058e-01]
[9.9999285e-01]
[5.1210046e-01]]
上述Print
语句的输出,即预测类
如下所示:
import os, shutil
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dropout, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.losses import binary_crossentropy
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
import matplotlib.pyplot as plt
# Path to Training Directory
train_dir = 'Dogs_Vs_Cats_Small/train'
# Path to Validation Directory
validation_dir = 'Dogs_Vs_Cats_Small/validation'
#### Create the Convolutional Base
Max_Pool_Size = (2,2)
model = Sequential([
Conv2D(input_shape = (150, 150, 3), filters = 32, kernel_size = (3,3), activation = 'relu',
padding = 'valid', data_format = 'channels_last'),
MaxPooling2D(pool_size = Max_Pool_Size),
Conv2D(filters = 64, kernel_size = (3,3), activation = 'relu', padding = 'valid'),
MaxPooling2D(pool_size = Max_Pool_Size),
Conv2D(filters = 128, kernel_size = (3,3), activation = 'relu', padding = 'valid'),
MaxPooling2D(pool_size = Max_Pool_Size),
Conv2D(filters = 128, kernel_size = (3,3), activation = 'relu', padding = 'valid'),
MaxPooling2D(pool_size = Max_Pool_Size)
])
#### Define the Dense Layers on Top of Convolutional Base
model.add(Flatten())
model.add(Dense(units = 512, activation = 'relu'))
model.add(Dense(units = 1, activation = 'sigmoid'))
model.summary()
model.compile(optimizer = RMSprop(learning_rate = 0.001), loss = 'binary_crossentropy', metrics = 'acc')
Train_Gen = ImageDataGenerator(1./255)
Val_Gen = ImageDataGenerator(1./255)
Train_Generator = Train_Gen.flow_from_directory(train_dir, target_size = (150,150), batch_size = 20,
class_mode = 'binary')
Val_Generator = Val_Gen.flow_from_directory(validation_dir, target_size = (150, 150), class_mode = 'binary',
batch_size = 20)
batch_size = 20
target_size = (150,150)
No_Of_Training_Images = Train_Generator.classes.shape[0]
No_Of_Val_Images = Val_Generator.classes.shape[0]
steps_per_epoch = No_Of_Training_Images/batch_size
validation_steps = No_Of_Val_Images/batch_size
history = model.fit(x = Train_Generator, shuffle=True, epochs = 20,
steps_per_epoch = steps_per_epoch,
validation_data = Val_Generator
, validation_steps = validation_steps
)
Test_Dir = 'Dogs_Vs_Cats_Very_Small/test'
Test_Generator = ImageDataGenerator(1./255).flow_from_directory(Test_Dir,
target_size = (150,150), batch_size = 1,
shuffle = False, class_mode = 'binary') # This outputs Found 17 images belonging to 2 classes.
No_Of_Samples = len(Test_Generator.filenames)
testPredictions = model.predict(Test_Generator, steps = No_Of_Samples)
predictedClassIndices=np.argmax(testPredictions,axis=1)
print(predictedClassIndices)
filenames = Test_Generator.filenames
for f in range(len(filenames)):
print(filenames[f],":",predictedClassIndices[f])
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
cats/cat.1546.jpg : 0
cats/cat.1547.jpg : 0
cats/cat.1548.jpg : 0
cats/cat.1549.jpg : 0
cats/cat.1550.jpg : 0
cats/cat.1566.jpg : 0
cats/cat.1593.jpg : 0
cats/cat.1594.jpg : 0
dogs/dog.1514.jpg : 0
dogs/dog.1520.jpg : 0
dogs/dog.1525.jpg : 0
dogs/dog.1551.jpg : 0
dogs/dog.1555.jpg : 0
dogs/dog.1574.jpg : 0
dogs/dog.1594.jpg : 0
dogs/dog.1597.jpg : 0
dogs/dog.1599.jpg : 0
[[1.0473319e-05]
[9.8473930e-01]
[2.9069009e-01]
[5.0639841e-07]
[1.8511847e-01]
[6.0166395e-01]
[4.2568660e-01]
[4.6028453e-01]
[7.8800195e-01]
[8.5675471e-02]
[8.2654454e-02]
[7.2898394e-01]
[1.5504999e-01]
[8.2106847e-01]
[8.7003058e-01]
[9.9999285e-01]
[5.1210046e-01]]
如上所述,所有图像都预测为Class=0
,即Cats
我已经研究过了,我的数据是平衡的(1000张猫的图片和1000张狗的图片),所以,根据我的理解,重新平衡我的数据集或调整类权重并不适用。我也尝试过“增加训练时间”
编辑:测试预测的内容如下所示:
import os, shutil
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dropout, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.losses import binary_crossentropy
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
import matplotlib.pyplot as plt
# Path to Training Directory
train_dir = 'Dogs_Vs_Cats_Small/train'
# Path to Validation Directory
validation_dir = 'Dogs_Vs_Cats_Small/validation'
#### Create the Convolutional Base
Max_Pool_Size = (2,2)
model = Sequential([
Conv2D(input_shape = (150, 150, 3), filters = 32, kernel_size = (3,3), activation = 'relu',
padding = 'valid', data_format = 'channels_last'),
MaxPooling2D(pool_size = Max_Pool_Size),
Conv2D(filters = 64, kernel_size = (3,3), activation = 'relu', padding = 'valid'),
MaxPooling2D(pool_size = Max_Pool_Size),
Conv2D(filters = 128, kernel_size = (3,3), activation = 'relu', padding = 'valid'),
MaxPooling2D(pool_size = Max_Pool_Size),
Conv2D(filters = 128, kernel_size = (3,3), activation = 'relu', padding = 'valid'),
MaxPooling2D(pool_size = Max_Pool_Size)
])
#### Define the Dense Layers on Top of Convolutional Base
model.add(Flatten())
model.add(Dense(units = 512, activation = 'relu'))
model.add(Dense(units = 1, activation = 'sigmoid'))
model.summary()
model.compile(optimizer = RMSprop(learning_rate = 0.001), loss = 'binary_crossentropy', metrics = 'acc')
Train_Gen = ImageDataGenerator(1./255)
Val_Gen = ImageDataGenerator(1./255)
Train_Generator = Train_Gen.flow_from_directory(train_dir, target_size = (150,150), batch_size = 20,
class_mode = 'binary')
Val_Generator = Val_Gen.flow_from_directory(validation_dir, target_size = (150, 150), class_mode = 'binary',
batch_size = 20)
batch_size = 20
target_size = (150,150)
No_Of_Training_Images = Train_Generator.classes.shape[0]
No_Of_Val_Images = Val_Generator.classes.shape[0]
steps_per_epoch = No_Of_Training_Images/batch_size
validation_steps = No_Of_Val_Images/batch_size
history = model.fit(x = Train_Generator, shuffle=True, epochs = 20,
steps_per_epoch = steps_per_epoch,
validation_data = Val_Generator
, validation_steps = validation_steps
)
Test_Dir = 'Dogs_Vs_Cats_Very_Small/test'
Test_Generator = ImageDataGenerator(1./255).flow_from_directory(Test_Dir,
target_size = (150,150), batch_size = 1,
shuffle = False, class_mode = 'binary') # This outputs Found 17 images belonging to 2 classes.
No_Of_Samples = len(Test_Generator.filenames)
testPredictions = model.predict(Test_Generator, steps = No_Of_Samples)
predictedClassIndices=np.argmax(testPredictions,axis=1)
print(predictedClassIndices)
filenames = Test_Generator.filenames
for f in range(len(filenames)):
print(filenames[f],":",predictedClassIndices[f])
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
cats/cat.1546.jpg : 0
cats/cat.1547.jpg : 0
cats/cat.1548.jpg : 0
cats/cat.1549.jpg : 0
cats/cat.1550.jpg : 0
cats/cat.1566.jpg : 0
cats/cat.1593.jpg : 0
cats/cat.1594.jpg : 0
dogs/dog.1514.jpg : 0
dogs/dog.1520.jpg : 0
dogs/dog.1525.jpg : 0
dogs/dog.1551.jpg : 0
dogs/dog.1555.jpg : 0
dogs/dog.1574.jpg : 0
dogs/dog.1594.jpg : 0
dogs/dog.1597.jpg : 0
dogs/dog.1599.jpg : 0
[[1.0473319e-05]
[9.8473930e-01]
[2.9069009e-01]
[5.0639841e-07]
[1.8511847e-01]
[6.0166395e-01]
[4.2568660e-01]
[4.6028453e-01]
[7.8800195e-01]
[8.5675471e-02]
[8.2654454e-02]
[7.2898394e-01]
[1.5504999e-01]
[8.2106847e-01]
[8.7003058e-01]
[9.9999285e-01]
[5.1210046e-01]]
有人能帮我纠正一下吗
提前感谢大家。当您将类分配给testPredictions
结果时,这里的问题在最后一步。argmax
方法“返回沿轴的最大值索引”。在您的情况下,它总是0
,因为沿着axis=1
只有一个元素(索引0
)
由于您正在进行二进制分类,并且类是平衡的,因此应用0.5概率阈值来分配类是最有意义的:
predictedClassIndices = testPredictions > 0.5
for idx, filename in enumerate(filenames):
print(filename,":",predictedClassIndices[idx])
你能为你的例子提供testPredictions
的内容吗?@Lukasz-Tracewski,当然,谢谢你的回答。我已经用testPredictions
的内容更新了我的问题。