Python Keras 3D卷积:检查模型输入时出错:预期covolution3d_输入_1有5个维度,但得到阵列形状(1,90100100)

Python Keras 3D卷积:检查模型输入时出错:预期covolution3d_输入_1有5个维度,但得到阵列形状(1,90100100),python,machine-learning,neural-network,deep-learning,keras,Python,Machine Learning,Neural Network,Deep Learning,Keras,该系统的目标是对单词发音的视频输入进行分类。每个样本是一组90100x100灰度(单色通道框架,尺寸(1,90100100)。以前,训练数据直接加载到内存中并进行训练,虽然有效,但效率不高,而且以后再使用更多的训练样本是不可能的。为了解决这个问题,对系统进行了修改,对训练数据进行预处理并将其保存到HDF5文件中,然后使用g按需加载的生成器。但是,由于此修改,现在生成以下错误: 异常:检查模型输入时出错:应为 卷积3D_输入_1有5个维度,但得到了具有形状的数组 (1,90100100) 以下是系

该系统的目标是对单词发音的视频输入进行分类。每个样本是一组90100x100灰度(单色通道框架,尺寸
(1,90100100)
。以前,训练数据直接加载到内存中并进行训练,虽然有效,但效率不高,而且以后再使用更多的训练样本是不可能的。为了解决这个问题,对系统进行了修改,对训练数据进行预处理并将其保存到
HDF5
文件中,然后使用g按需加载的生成器。但是,由于此修改,现在生成以下错误:

异常:检查模型输入时出错:应为 卷积3D_输入_1有5个维度,但得到了具有形状的数组 (1,90100100)

以下是系统的代码:

from keras import backend as K
from keras.callbacks import Callback
from keras.constraints import maxnorm
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers.convolutional import Convolution3D
from keras.layers.convolutional import MaxPooling3D
from keras.optimizers import SGD
from keras.utils import np_utils
from keras.utils.io_utils import HDF5Matrix
from pprint import pprint
from sklearn.utils import shuffle
K.set_image_dim_ordering("th")

import cv2
import h5py
import json
import os
import sys
import numpy as np

class OpticalSpeechRecognizer(object):
    def __init__(self, rows, columns, frames_per_sequence):
        self.rows = rows
        self.columns = columns
        self.frames_per_sequence = frames_per_sequence
        self.osr = None

    def train_osr_model(self, training_save_fn):
        """ Train the optical speech recognizer
        """
        print "\nTraining OSR"
        validation_ratio = 0.3
        training_sequence_generator = self.generate_training_sequences(training_save_fn)
        validation_sequence_generator = self.generate_training_sequences(training_save_fn, validation_ratio=validation_ratio)
        training_save_file = h5py.File(training_save_fn, "r")
        sample_count = training_save_file.attrs["sample_count"]
        pbi = PrintBatchInfo()
        self.osr.fit_generator(generator=training_sequence_generator,
                               validation_data=validation_sequence_generator,
                               samples_per_epoch=sample_count,
                               nb_val_samples=int(round(validation_ratio*sample_count)),
                               nb_epoch=10,
                               verbose=2,
                               callbacks=[pbi],
                               class_weight=None,
                               nb_worker=1)

    def generate_osr_model(self, training_save_fn):
        """ Builds the optical speech recognizer model
        """
        print "".join(["Generating OSR model\n",
                       "-"*40])
        training_save_file = h5py.File(training_save_fn, "r")
        osr = Sequential()
        print " - Adding convolution layers"
        osr.add(Convolution3D(nb_filter=32,
                              kernel_dim1=3,
                              kernel_dim2=3,
                              kernel_dim3=3,
                              border_mode="same",
                              input_shape=(1, self.frames_per_sequence, self.rows, self.columns),
                              activation="relu"))
        osr.add(Dropout(0.2))
        osr.add(Convolution3D(nb_filter=32,
                              kernel_dim1=3,
                              kernel_dim2=3,
                              kernel_dim3=3,
                              border_mode="same",
                              activation="relu"))
        osr.add(MaxPooling3D(pool_size=(3, 3, 3)))
        osr.add(Convolution3D(nb_filter=64,
                              kernel_dim1=3,
                              kernel_dim2=3,
                              kernel_dim3=3,
                              border_mode="same",
                              activation="relu"))
        osr.add(Dropout(0.2))
        osr.add(Convolution3D(nb_filter=64,
                              kernel_dim1=3,
                              kernel_dim2=3,
                              kernel_dim3=3,
                              border_mode="same",
                              activation="relu"))
        osr.add(MaxPooling3D(pool_size=(3, 3, 3)))
        osr.add(Convolution3D(nb_filter=128,
                              kernel_dim1=3,
                              kernel_dim2=3,
                              kernel_dim3=3,
                              border_mode="same",
                              activation="relu"))
        osr.add(Dropout(0.2))
        osr.add(Convolution3D(nb_filter=128,
                              kernel_dim1=3,
                              kernel_dim2=3,
                              kernel_dim3=3,
                              border_mode="same",
                              activation="relu"))
        osr.add(MaxPooling3D(pool_size=(3, 3, 3)))
        osr.add(Flatten())
        osr.add(Dropout(0.2))
        print " - Adding fully connected layers"
        osr.add(Dense(output_dim=128,
                      init="normal",
                      activation="relu"))
        osr.add(Dropout(0.2))
        osr.add(Dense(output_dim=64,
                      init="normal",
                      activation="relu"))
        osr.add(Dropout(0.2))
        osr.add(Dense(output_dim=32,
                      init="normal",
                      activation="relu"))
        osr.add(Dropout(0.2))
        osr.add(Dense(output_dim=len(training_save_file.attrs["training_classes"].split(",")),
                      init="normal",
                      activation="softmax"))
        print " - Compiling model"
        sgd = SGD(lr=0.01,
                  decay=1e-6,
                  momentum=0.9,
                  nesterov=True)
        osr.compile(loss="categorical_crossentropy",
                    optimizer=sgd,
                    metrics=["accuracy"])
        self.osr = osr
        print " * OSR MODEL GENERATED * "

    def generate_training_sequences(self, training_save_fn, validation_ratio=0):
        while True:
            training_save_file = h5py.File(training_save_fn, "r")
            sample_count = int(training_save_file.attrs["sample_count"])
            # generate sequences for validation
            if validation_ratio:
                validation_sample_count = int(round(validation_ratio*sample_count))
                validation_sample_idxs = np.random.randint(low=0, high=sample_count, size=validation_sample_count)
                for idx in validation_sample_idxs:
                    X = training_save_file["X"][idx]
                    Y = training_save_file["Y"][idx]
                    yield (X, Y)
            # generate sequences for training
            else:
                for idx in range(0, sample_count):
                    X = training_save_file["X"][idx]
                    Y = training_save_file["Y"][idx]
                    yield (X, Y)

    def process_training_data(self, config_file, training_save_fn):
        """ Preprocesses training data and saves them into an HDF5 file
        """
        # load training metadata from config file
        training_metadata = {}
        training_classes = []
        with open(config_file) as training_config:
            training_metadata = json.load(training_config)
            training_classes = sorted(list(training_metadata.keys()))

            print "".join(["\n",
                           "Found {0} training classes!\n".format(len(training_classes)),
                           "-"*40])
            for class_label, training_class in enumerate(training_classes):
                print "{0:<4d} {1:<10s} {2:<30s}".format(class_label, training_class, training_metadata[training_class])
            print ""

        # count number of samples
        sample_count = 0
        sample_count_by_class = [0]*len(training_classes)
        for class_label, training_class in enumerate(training_classes):
            # get training class sequeunce paths
            training_class_data_path = training_metadata[training_class]
            training_class_sequence_paths = [os.path.join(training_class_data_path, file_name)
                                             for file_name in os.listdir(training_class_data_path)
                                             if (os.path.isfile(os.path.join(training_class_data_path, file_name))
                                                 and ".mov" in file_name)]
            # update sample count
            sample_count += len(training_class_sequence_paths)
            sample_count_by_class[class_label] = len(training_class_sequence_paths)

        print "".join(["\n",
                       "Found {0} training samples!\n".format(sample_count),
                       "-"*40])
        for class_label, training_class in enumerate(training_classes):
            print "{0:<4d} {1:<10s} {2:<6d}".format(class_label, training_class, sample_count_by_class[class_label])
        print ""

        # initialize HDF5 save file, but clear older duplicate first if it exists
        try:
            print "Saved file \"{0}\" already exists! Overwriting previous saved file.\n".format(training_save_fn)
            os.remove(training_save_fn)
        except OSError:
            pass
        training_save_file = h5py.File(training_save_fn, "w")
        training_save_file.attrs["training_classes"] = np.string_(",".join(training_classes))
        training_save_file.attrs["sample_count"] = sample_count
        x_training_dataset = training_save_file.create_dataset("X", 
                                                              shape=(sample_count, 1, self.frames_per_sequence, self.rows, self.columns),
                                                              dtype="f")
        y_training_dataset = training_save_file.create_dataset("Y",
                                                               shape=(sample_count, len(training_classes)),
                                                               dtype="i")

        # iterate through each class data
        sample_idx = 0
        for class_label, training_class in enumerate(training_classes):
            # get training class sequeunce paths
            training_class_data_path = training_metadata[training_class]
            training_class_sequence_paths = [os.path.join(training_class_data_path, file_name)
                                             for file_name in os.listdir(training_class_data_path)
                                             if (os.path.isfile(os.path.join(training_class_data_path, file_name))
                                                 and ".mov" in file_name)]
            # iterate through each sequence
            for idx, training_class_sequence_path in enumerate(training_class_sequence_paths):
                sys.stdout.write("Processing training data for class \"{0}\": {1}/{2} sequences\r"
                                 .format(training_class, idx+1, len(training_class_sequence_paths)))
                sys.stdout.flush()

                # append grayscale, normalized sample frames
                frames = self.process_frames(training_class_sequence_path)
                x_training_dataset[sample_idx] = [frames]

                # append one-hot encoded sample label
                label = [0]*len(training_classes)
                label[class_label] = 1
                y_training_dataset[sample_idx] = label

                # update sample index
                sample_idx += 1

            print "\n"

        training_save_file.close()

        print "Training data processed and saved to {0}".format(training_save_fn)

    def process_frames(self, video_file_path):
        """ Splits frames, resizes frames, converts RGB frames to greyscale, and normalizes frames
        """
        video = cv2.VideoCapture(video_file_path)
        success, frame = video.read()

        frames = []
        success = True

        # resize, convert to grayscale, normalize, and collect valid frames 
        while success:
          success, frame = video.read()
          if success:
            frame = cv2.resize(frame, (self.rows, self.columns))
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            frame = frame.astype('float32') / 255.0
            frames.append(frame)

        # pre-pad short sequences and equalize frame lengths
        if len(frames) < self.frames_per_sequence:
            frames = [frames[0]]*(self.frames_per_sequence - len(frames)) + frames
        frames = frames[0:self.frames_per_sequence]

        return frames

class PrintBatchInfo(Callback):
    def on_batch_end(self, epoch, logs={}):
        print logs

if __name__ == "__main__":
    osr = OpticalSpeechRecognizer(100, 100, 90)
    osr.process_training_data("training_config.json", "training_data.h5")
    osr.generate_osr_model("training_data.h5")
    osr.train_osr_model("training_data.h5")
从keras导入后端为K
从keras.callbacks导入回调
从keras.constraints导入maxnorm
从keras.models导入顺序
从keras.layers导入稠密
从keras.layers导入退出
从keras.layers导入展平
从keras.layers.convolution导入Convolution3D
从keras.layers.convolutional导入MaxPoolg3D
从keras.optimizers导入新加坡元
从keras.utils导入np_utils
从keras.utils.io_utils导入HDF5矩阵
从pprint导入pprint
从sklearn.utils导入shuffle
K.设置图像尺寸顺序(“th”)
进口cv2
进口h5py
导入json
导入操作系统
导入系统
将numpy作为np导入
类OpticalSpeechRecognitor(对象):
定义初始化(self、行、列、帧按顺序):
self.rows=行
self.columns=列
self.frames\u per\u sequence=frames\u per\u sequence
self.osr=None
def训练osr模型(自我、训练保存)
“训练光学语音识别器
"""
打印“\n培训OSR”
验证比率=0.3
训练序列生成器=自生成训练序列(训练保存)
验证\序列\生成器=自身。生成\训练\序列(训练\保存\ fn,验证\比率=验证\比率)
training_save_file=h5py.file(training_save_fn,“r”)
sample\u count=training\u save\u file.attrs[“sample\u count”]
pbi=PrintBatchInfo()
自组装发电机(发电机=培训发电机),
验证数据=验证序列生成器,
每个历元的样本数=样本数,
nb_val_samples=int(四舍五入(验证比率*样本计数)),
nb_epoch=10,
详细=2,
回调=[pbi],
等级重量=无,
注意:工人=1)
def生成osr模型(自我、培训保存):
“”“构建光学语音识别器模型
"""
打印“”。加入([“生成OSR模型”\n“,
"-"*40])
training_save_file=h5py.file(training_save_fn,“r”)
osr=Sequential()
打印“-添加卷积层”
osr.add(卷积3D(nb_过滤器=32,
内核_dim1=3,
内核_dim2=3,
内核_dim3=3,
border_mode=“相同”,
输入_形状=(1,self.frames_/_序列,self.rows,self.columns),
激活(“relu”))
osr.add(辍学率(0.2))
osr.add(卷积3D(nb_过滤器=32,
内核_dim1=3,
内核_dim2=3,
内核_dim3=3,
border_mode=“相同”,
激活(“relu”))
add(maxpoolg3d(池大小=(3,3,3)))
osr.add(卷积3D(nb_过滤器=64,
内核_dim1=3,
内核_dim2=3,
内核_dim3=3,
border_mode=“相同”,
激活(“relu”))
osr.add(辍学率(0.2))
osr.add(卷积3D(nb_过滤器=64,
内核_dim1=3,
内核_dim2=3,
内核_dim3=3,
border_mode=“相同”,
激活(“relu”))
add(maxpoolg3d(池大小=(3,3,3)))
osr.add(卷积3D(nb_过滤器=128,
内核_dim1=3,
内核_dim2=3,
内核_dim3=3,
border_mode=“相同”,
激活(“relu”))
osr.add(辍学率(0.2))
osr.add(卷积3D(nb_过滤器=128,
内核_dim1=3,
内核_dim2=3,
内核_dim3=3,
border_mode=“相同”,
激活(“relu”))
add(maxpoolg3d(池大小=(3,3,3)))
添加(展平())
osr.add(辍学率(0.2))
打印“-添加完全连接的层”
osr.add(密集型)(输出尺寸=128,
init=“正常”,
激活(“relu”))
osr.add(辍学率(0.2))
osr.add(密集(输出尺寸=64,
init=“正常”,
激活(“relu”))
osr.add(辍学率(0.2))
osr.add(密集型)(输出尺寸=32,
init=“正常”,
激活(“relu”))
osr.add(辍学率(0.2))
osr.add(稠密(output_dim=len(training_save_file.attrs[“training_classes”]).split(“,”)),
init=“正常”,
激活=“软MA
X = training_save_file["X"][[idx]]