Python 如何加载文件夹内的文件夹,如列车和测试文件夹中的每个文件夹,并避免内存错误?
文件夹(train)>10个文件夹(即玫瑰、向日葵、百合、莲花、雏菊、郁金香、兰花、鸢尾、薰衣草、水仙花)>每个文件夹都有大小为500 x 500的jpg图像。测试文件夹也是如此。 每个类中的图像总数为=900。因此,总共将有9000人参加培训,9000人参加测试 我想对k-fold=5进行交叉验证。我不知道如何在代码中加载数据以避免内存错误Python 如何加载文件夹内的文件夹,如列车和测试文件夹中的每个文件夹,并避免内存错误?,python,machine-learning,image-processing,deep-learning,Python,Machine Learning,Image Processing,Deep Learning,文件夹(train)>10个文件夹(即玫瑰、向日葵、百合、莲花、雏菊、郁金香、兰花、鸢尾、薰衣草、水仙花)>每个文件夹都有大小为500 x 500的jpg图像。测试文件夹也是如此。 每个类中的图像总数为=900。因此,总共将有9000人参加培训,9000人参加测试 我想对k-fold=5进行交叉验证。我不知道如何在代码中加载数据以避免内存错误 from tensorflow.keras.models import Sequential from tensorflow.keras.layers i
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.losses import sparse_categorical_crossentropy
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import KFold
import numpy as np
# Model configuration
batch_size = 50
size = 500, 500
num_channels=3
loss_function = sparse_categorical_crossentropy
no_classes = 10
no_epochs = 25
optimizer = Adam()
verbosity = 1
num_folds = 5
# Load data
train = "C:/Users/trainb/"
test = "C:/Users/testb/"
# List out the directories inside the main input folder
folders = os.listdir(train)
folders = os.listdir(test)
print(folders)
image_names = []
label_train = []
data_train= []
label_test = []
data_test = []
for folder in folders:
for file in os.listdir(os.path.join(train,folder)):
if file.endswith("jpg"):
label_train.append(os.path.join(train,folder,file))
img = cv2.imread(os.path.join(train,folder,file))
im = cv2.resize(img,size)
data_train.append(im)
else:
continue
for folder in folders:
for file in os.listdir(os.path.join(test,folder)):
if file.endswith("jpg"):
label_test.append(os.path.join(test,folder,file))
img = cv2.imread(os.path.join(test,folder,file))
im = cv2.resize(img,size)
data_test.append(im)
else:
continue
# Determine shape of the data
input_shape = (size, num_channels)
# converting list to array
input_train = np.array(data_train)
target_train = np.array(label_train)
input_test = np.array(data_test)
target_test = np.array(label_test)
# Parse numbers as floats
input_train = input_train.astype('float32')
input_test = input_test.astype('float32')
# Normalize data
input_train = input_train / 255
input_test = input_test / 255
# Define per-fold score containers
acc_per_fold = []
loss_per_fold = []
# Merge inputs and targets
inputs = np.concatenate((input_train, input_test), axis=0)
targets = np.concatenate((target_train, target_test), axis=0)
# Define the K-fold Cross Validator
kfold = KFold(n_splits=num_folds, shuffle=True)
# K-fold Cross Validation model evaluation
fold_no = 1
for train, test in kfold.split(inputs, targets):
# Define the model architecture
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(no_classes, activation='softmax'))
# Compile the model
model.compile(loss=loss_function,
optimizer=optimizer,
metrics=['accuracy'])
# Generate a print
print('------------------------------------------------------------------------')
print(f'Training for fold {fold_no} ...')
# Fit data to model
history = model.fit(inputs[train], targets[train],
batch_size=batch_size,
epochs=no_epochs,
verbose=verbosity)
# Generate generalization metrics
scores = model.evaluate(inputs[test], targets[test], verbose=0)
print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%')
acc_per_fold.append(scores[1] * 100)
loss_per_fold.append(scores[0])
# Increase fold number
fold_no = fold_no + 1
# == Provide average scores ==
print('------------------------------------------------------------------------')
print('Score per fold')
for i in range(0, len(acc_per_fold)):
print('------------------------------------------------------------------------')
print(f'> Fold {i+1} - Loss: {loss_per_fold[i]} - Accuracy: {acc_per_fold[i]}%')
print('------------------------------------------------------------------------')
print('Average scores for all folds:')
print(f'> Accuracy: {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})')
print(f'> Loss: {np.mean(loss_per_fold)}')
print('------------------------------------------------------------------------')
我也面临同样的问题。我通过批量加载数据集来解决这个问题。我希望此链接可以帮助您我的数据集文件夹在train文件夹中有唯一的名称,但每个图像中的图像从0开始,到899结束。