如何在使用可初始化迭代器时从tensorflow中的多个TFR记录中检索示例_Tensorflow_Tensorflow Datasets

如何在使用可初始化迭代器时从tensorflow中的多个TFR记录中检索示例

tensorflow

如何在使用可初始化迭代器时从tensorflow中的多个TFR记录中检索示例,tensorflow,tensorflow-datasets,Tensorflow,Tensorflow Datasets,我有多个名为：Train_DE_01.tfrecords到Train_DE_34.tfrecords；和Devel_DE_01.tfrecords到Devel_DE_14.tfrecords。因此，我有一个培训和验证数据集。我的目标是迭代tfrecords的示例，这样我可以从Train\u DE\u 01.tfrecords，从Train\u DE\u 02.tfrecords，检索到两个示例。。。和2Train\u DE_34.tfrecords。换句话说，当批大小为68时，我需要从每个tfr

我有多个名为：

Train_DE_01.tfrecords

到

Train_DE_34.tfrecords

；和

Devel_DE_01.tfrecords

到

Devel_DE_14.tfrecords

。因此，我有一个培训和验证数据集。我的目标是迭代tfrecords的示例，这样我可以从

Train\u DE\u 01.tfrecords

，从

Train\u DE\u 02.tfrecords

，检索到两个示例。。。和2

Train\u DE_34.tfrecords

。换句话说，当批大小为68时，我需要从每个

tfrecord

文件中提取两个示例。在我的代码中，我使用了一个可初始化的迭代器，如下所示：

# file_name: This is a place_holder that will contain the name of the files of the tfrecords.
def load_sewa_data(file_name, batch_size):

    with tf.name_scope('sewa_tf_records'):
        dataset = tf.data.TFRecordDataset(file_name).map(_parse_sewa_example).batch(batch_size)
        iterator = dataset.make_initializable_iterator(shared_name='sewa_iterator')

        next_batch = iterator.get_next()

        names, detected, arousal, valence, liking, istalkings, images = next_batch

        print(names, detected, arousal, valence, liking, istalkings, images)

        return names, detected, arousal, valence, liking, istalkings, images, iterator

def load_devel_sewa_tfrecords(filenames_dev, test_batch_size):

    datasets_dev_iterators = []

    with tf.name_scope('TFRecordsDevel'):
        for file_name in filenames_dev:
            dataset_dev = tf.data.TFRecordDataset(file_name).map(_parse_devel_function).batch(test_batch_size)
            datasets_dev_iterators.append(dataset_dev)

        dataset_dev_all = tf.data.Dataset.zip(tuple(datasets_dev_iterators))
        return dataset_dev_all


def load_train_sewa_tfrecords(filenames_train, train_batch_size):
    datasets_train_iterators = []

    with tf.name_scope('TFRecordsTrain'):
        for file_name in filenames_train:
            dataset_train = tf.data.TFRecordDataset(file_name).map(_parse_train_function).batch(train_batch_size)
            datasets_train_iterators.append(dataset_train)

        dataset_train_all = tf.data.Dataset.zip(tuple(datasets_train_iterators))

        return dataset_train_all


def load_sewa_dataset(filenames_train, train_batch_size, filenames_dev, test_batch_size):
    dataset_train_all = load_train_sewa_tfrecords(filenames_train, train_batch_size)
    dataset_dev_all = load_devel_sewa_tfrecords(filenames_dev, test_batch_size)

    iterator = tf.data.Iterator.from_structure(dataset_train_all.output_types,
                                               dataset_train_all.output_shapes)

    training_init_op = iterator.make_initializer(dataset_train_all)
    validation_init_op = iterator.make_initializer(dataset_dev_all)

    with tf.name_scope('inputs'):
        next_batch = iterator.get_next(name='next_batch')
        names = []
        detected = []
        arousal = []
        valence = []
        liking = []
        istalkings = []
        images = []

        # len(next_batch) is 34.
        # len(n) is 7. Since we are extracting: name, detected, arousal, valence, liking, istalking and images...
        # len(n[0 or 1 or 2 or ... or 6]) = is batch size.
        for n in next_batch:

            names.append(n[0])
            detected.append(n[1])
            arousal.append(n[2])
            valence.append(n[3])
            liking.append(n[4])
            istalkings.append(n[5])
            images.append(n[6])

        names = tf.concat(names, axis=0, name='names')
        detected = tf.concat(detected, axis=0, name='detected')
        arousal = tf.concat(arousal, axis=0, name='arousal')
        valence = tf.concat(valence, axis=0, name='valence')
        liking = tf.concat(liking, axis=0, name='liking')
        istalkings = tf.concat(istalkings, axis=0, name='istalkings')
        images = tf.concat(images, axis=0, name='images')

        return names, detected, arousal, valence, liking, istalkings, images, training_init_op, validation_init_op

def load_train_sewa_tfrecords(filenames_train, train_batch_size):
    datasets_train_iterators = []

    with tf.name_scope('TFRecordsTrain'):
        for file_name in filenames_train:
            dataset_train = tf.data.TFRecordDataset(file_name).map(_parse_train_function).batch(train_batch_size)
            datasets_train_iterators.append(dataset_train)

        dataset_train_all = tf.data.Dataset.zip(tuple(datasets_train_iterators))
        iterator_train_all = dataset_train_all.make_initializable_iterator()

    with tf.name_scope('inputs_train'):
        next_batch = iterator_train_all.get_next(name='next_batch')

        names = []
        detected = []
        arousal = []
        valence = []
        liking = []
        istalkings = []
        images = []

        # len(next_batch) is 34.
        # len(n) is 7. Since we are extracting: name, detected, arousal, valence, liking, istalking and images...
        # len(n[0 or 1 or 2 or ... or 6]) = is batch size.
        for n in next_batch:

            names.append(n[0])
            detected.append(n[1])
            arousal.append(n[2])
            valence.append(n[3])
            liking.append(n[4])
            istalkings.append(n[5])
            images.append(n[6])

        names = tf.concat(names, axis=0, name='names')
        detected = tf.concat(detected, axis=0, name='detected')
        arousal = tf.concat(arousal, axis=0, name='arousal')
        valence = tf.concat(valence, axis=0, name='valence')
        liking = tf.concat(liking, axis=0, name='liking')
        istalkings = tf.concat(istalkings, axis=0, name='istalkings')
        images = tf.concat(images, axis=0, name='images')

        return names, detected, arousal, valence, liking, istalkings, images, iterator_train_all

使用sess.run（）在会话中运行名称之后；我发现前68个示例是从

Train_DE_01.tfrecords

获取的；然后，从同一个tfrecord中提取后续示例，直到使用

Train_DE_01.tfrecords

中的所有示例

我已尝试将Dataset api的zip（）函数与可重新初始化的迭代器一起使用，如下所示：

# file_name: This is a place_holder that will contain the name of the files of the tfrecords.
def load_sewa_data(file_name, batch_size):

    with tf.name_scope('sewa_tf_records'):
        dataset = tf.data.TFRecordDataset(file_name).map(_parse_sewa_example).batch(batch_size)
        iterator = dataset.make_initializable_iterator(shared_name='sewa_iterator')

        next_batch = iterator.get_next()

        names, detected, arousal, valence, liking, istalkings, images = next_batch

        print(names, detected, arousal, valence, liking, istalkings, images)

        return names, detected, arousal, valence, liking, istalkings, images, iterator

def load_devel_sewa_tfrecords(filenames_dev, test_batch_size):

    datasets_dev_iterators = []

    with tf.name_scope('TFRecordsDevel'):
        for file_name in filenames_dev:
            dataset_dev = tf.data.TFRecordDataset(file_name).map(_parse_devel_function).batch(test_batch_size)
            datasets_dev_iterators.append(dataset_dev)

        dataset_dev_all = tf.data.Dataset.zip(tuple(datasets_dev_iterators))
        return dataset_dev_all


def load_train_sewa_tfrecords(filenames_train, train_batch_size):
    datasets_train_iterators = []

    with tf.name_scope('TFRecordsTrain'):
        for file_name in filenames_train:
            dataset_train = tf.data.TFRecordDataset(file_name).map(_parse_train_function).batch(train_batch_size)
            datasets_train_iterators.append(dataset_train)

        dataset_train_all = tf.data.Dataset.zip(tuple(datasets_train_iterators))

        return dataset_train_all


def load_sewa_dataset(filenames_train, train_batch_size, filenames_dev, test_batch_size):
    dataset_train_all = load_train_sewa_tfrecords(filenames_train, train_batch_size)
    dataset_dev_all = load_devel_sewa_tfrecords(filenames_dev, test_batch_size)

    iterator = tf.data.Iterator.from_structure(dataset_train_all.output_types,
                                               dataset_train_all.output_shapes)

    training_init_op = iterator.make_initializer(dataset_train_all)
    validation_init_op = iterator.make_initializer(dataset_dev_all)

    with tf.name_scope('inputs'):
        next_batch = iterator.get_next(name='next_batch')
        names = []
        detected = []
        arousal = []
        valence = []
        liking = []
        istalkings = []
        images = []

        # len(next_batch) is 34.
        # len(n) is 7. Since we are extracting: name, detected, arousal, valence, liking, istalking and images...
        # len(n[0 or 1 or 2 or ... or 6]) = is batch size.
        for n in next_batch:

            names.append(n[0])
            detected.append(n[1])
            arousal.append(n[2])
            valence.append(n[3])
            liking.append(n[4])
            istalkings.append(n[5])
            images.append(n[6])

        names = tf.concat(names, axis=0, name='names')
        detected = tf.concat(detected, axis=0, name='detected')
        arousal = tf.concat(arousal, axis=0, name='arousal')
        valence = tf.concat(valence, axis=0, name='valence')
        liking = tf.concat(liking, axis=0, name='liking')
        istalkings = tf.concat(istalkings, axis=0, name='istalkings')
        images = tf.concat(images, axis=0, name='images')

        return names, detected, arousal, valence, liking, istalkings, images, training_init_op, validation_init_op

def load_train_sewa_tfrecords(filenames_train, train_batch_size):
    datasets_train_iterators = []

    with tf.name_scope('TFRecordsTrain'):
        for file_name in filenames_train:
            dataset_train = tf.data.TFRecordDataset(file_name).map(_parse_train_function).batch(train_batch_size)
            datasets_train_iterators.append(dataset_train)

        dataset_train_all = tf.data.Dataset.zip(tuple(datasets_train_iterators))
        iterator_train_all = dataset_train_all.make_initializable_iterator()

    with tf.name_scope('inputs_train'):
        next_batch = iterator_train_all.get_next(name='next_batch')

        names = []
        detected = []
        arousal = []
        valence = []
        liking = []
        istalkings = []
        images = []

        # len(next_batch) is 34.
        # len(n) is 7. Since we are extracting: name, detected, arousal, valence, liking, istalking and images...
        # len(n[0 or 1 or 2 or ... or 6]) = is batch size.
        for n in next_batch:

            names.append(n[0])
            detected.append(n[1])
            arousal.append(n[2])
            valence.append(n[3])
            liking.append(n[4])
            istalkings.append(n[5])
            images.append(n[6])

        names = tf.concat(names, axis=0, name='names')
        detected = tf.concat(detected, axis=0, name='detected')
        arousal = tf.concat(arousal, axis=0, name='arousal')
        valence = tf.concat(valence, axis=0, name='valence')
        liking = tf.concat(liking, axis=0, name='liking')
        istalkings = tf.concat(istalkings, axis=0, name='istalkings')
        images = tf.concat(images, axis=0, name='images')

        return names, detected, arousal, valence, liking, istalkings, images, iterator_train_all

现在，如果我尝试以下方法：

sess = tf.Session()
sess.run(training_init_op)
print(sess.run(names))

我得到了以下错误：

ValueError: The two structures don't have the same number of elements.

这是有意义的，因为训练文件的数量是34，而验证数据集的数量是14

我想知道我怎样才能实现心中的目标

非常感谢您的帮助

以下是我使用

tf.cond

找到的解决方法

为了从每个

tfrecord

中检索2个示例；我使用了

tf.Dataset.data

api的

zip

方法，如下所示：

# file_name: This is a place_holder that will contain the name of the files of the tfrecords.
def load_sewa_data(file_name, batch_size):

    with tf.name_scope('sewa_tf_records'):
        dataset = tf.data.TFRecordDataset(file_name).map(_parse_sewa_example).batch(batch_size)
        iterator = dataset.make_initializable_iterator(shared_name='sewa_iterator')

        next_batch = iterator.get_next()

        names, detected, arousal, valence, liking, istalkings, images = next_batch

        print(names, detected, arousal, valence, liking, istalkings, images)

        return names, detected, arousal, valence, liking, istalkings, images, iterator

def load_devel_sewa_tfrecords(filenames_dev, test_batch_size):

    datasets_dev_iterators = []

    with tf.name_scope('TFRecordsDevel'):
        for file_name in filenames_dev:
            dataset_dev = tf.data.TFRecordDataset(file_name).map(_parse_devel_function).batch(test_batch_size)
            datasets_dev_iterators.append(dataset_dev)

        dataset_dev_all = tf.data.Dataset.zip(tuple(datasets_dev_iterators))
        return dataset_dev_all


def load_train_sewa_tfrecords(filenames_train, train_batch_size):
    datasets_train_iterators = []

    with tf.name_scope('TFRecordsTrain'):
        for file_name in filenames_train:
            dataset_train = tf.data.TFRecordDataset(file_name).map(_parse_train_function).batch(train_batch_size)
            datasets_train_iterators.append(dataset_train)

        dataset_train_all = tf.data.Dataset.zip(tuple(datasets_train_iterators))

        return dataset_train_all


def load_sewa_dataset(filenames_train, train_batch_size, filenames_dev, test_batch_size):
    dataset_train_all = load_train_sewa_tfrecords(filenames_train, train_batch_size)
    dataset_dev_all = load_devel_sewa_tfrecords(filenames_dev, test_batch_size)

    iterator = tf.data.Iterator.from_structure(dataset_train_all.output_types,
                                               dataset_train_all.output_shapes)

    training_init_op = iterator.make_initializer(dataset_train_all)
    validation_init_op = iterator.make_initializer(dataset_dev_all)

    with tf.name_scope('inputs'):
        next_batch = iterator.get_next(name='next_batch')
        names = []
        detected = []
        arousal = []
        valence = []
        liking = []
        istalkings = []
        images = []

        # len(next_batch) is 34.
        # len(n) is 7. Since we are extracting: name, detected, arousal, valence, liking, istalking and images...
        # len(n[0 or 1 or 2 or ... or 6]) = is batch size.
        for n in next_batch:

            names.append(n[0])
            detected.append(n[1])
            arousal.append(n[2])
            valence.append(n[3])
            liking.append(n[4])
            istalkings.append(n[5])
            images.append(n[6])

        names = tf.concat(names, axis=0, name='names')
        detected = tf.concat(detected, axis=0, name='detected')
        arousal = tf.concat(arousal, axis=0, name='arousal')
        valence = tf.concat(valence, axis=0, name='valence')
        liking = tf.concat(liking, axis=0, name='liking')
        istalkings = tf.concat(istalkings, axis=0, name='istalkings')
        images = tf.concat(images, axis=0, name='images')

        return names, detected, arousal, valence, liking, istalkings, images, training_init_op, validation_init_op

def load_train_sewa_tfrecords(filenames_train, train_batch_size):
    datasets_train_iterators = []

    with tf.name_scope('TFRecordsTrain'):
        for file_name in filenames_train:
            dataset_train = tf.data.TFRecordDataset(file_name).map(_parse_train_function).batch(train_batch_size)
            datasets_train_iterators.append(dataset_train)

        dataset_train_all = tf.data.Dataset.zip(tuple(datasets_train_iterators))
        iterator_train_all = dataset_train_all.make_initializable_iterator()

    with tf.name_scope('inputs_train'):
        next_batch = iterator_train_all.get_next(name='next_batch')

        names = []
        detected = []
        arousal = []
        valence = []
        liking = []
        istalkings = []
        images = []

        # len(next_batch) is 34.
        # len(n) is 7. Since we are extracting: name, detected, arousal, valence, liking, istalking and images...
        # len(n[0 or 1 or 2 or ... or 6]) = is batch size.
        for n in next_batch:

            names.append(n[0])
            detected.append(n[1])
            arousal.append(n[2])
            valence.append(n[3])
            liking.append(n[4])
            istalkings.append(n[5])
            images.append(n[6])

        names = tf.concat(names, axis=0, name='names')
        detected = tf.concat(detected, axis=0, name='detected')
        arousal = tf.concat(arousal, axis=0, name='arousal')
        valence = tf.concat(valence, axis=0, name='valence')
        liking = tf.concat(liking, axis=0, name='liking')
        istalkings = tf.concat(istalkings, axis=0, name='istalkings')
        images = tf.concat(images, axis=0, name='images')

        return names, detected, arousal, valence, liking, istalkings, images, iterator_train_all

我将有一个类似的开发方法；或者我可以将传递参数更改为该方法，以便我可以使用相同的方法两次。。。（不是问题）

然后：

请注意，必须在

sess.run（[names…]）之前运行两个初始化sess.run（迭代器\u train\u all.initializer）
和sess.run（迭代器\u dev\u all.initializer）
，因为我想使用tf.cond
；将检索培训和验证示例，但tf.cond
将根据phase\u train
place\u holder只返回其中一个示例，这将确定我们是处于培训模式还是测试模式
证明：当我插入names=tf.Print（输入=[names]，数据=[names]，消息=[dev names'）
在load\u devel\u sewa\tfrecords
下时；返回前；我得到：
dev names[\'Devel_01\' \'Devel_01\' \'Devel_02\'...]

在控制台中打印出来。即，在评估培训数据集时；tensorflow同时评估了devel数据集；但是tf.cond
输出了与训练数据集相关的tf记录
希望这个答案有帮助
 也许你可以使用这个方法来完成这个特定的任务？例如，将训练集的循环长度设置为34，将块长度设置为2，应该足以循环浏览每条记录，然后从每条记录中生成2个示例？