Python ';TypeError:应为字节,找到str';使用数据集API创建图像数据集时

Python ';TypeError:应为字节,找到str';使用数据集API创建图像数据集时,python,tensorflow,tensorflow-datasets,Python,Tensorflow,Tensorflow Datasets,我想使用dataset API从图像中创建TensorFlow的数据集。这些图像被组织在一个复杂的层次结构中,但最后总是有两个目录“False”和“true”。我写了这段代码 import tensorflow as tf from tensorflow.data import Dataset import os def enumerate_all_files(rootdir): for subdir, dir, files in os.walk(rootdir): f

我想使用dataset API从图像中创建TensorFlow的数据集。这些图像被组织在一个复杂的层次结构中,但最后总是有两个目录“False”和“true”。我写了这段代码

import tensorflow as tf
from tensorflow.data import Dataset
import os

def enumerate_all_files(rootdir):
    for subdir, dir, files in os.walk(rootdir):
        for file in files:
            # return path to the file and its label
            # label is simply a 1 or 0 depending on whether an image is in the "Genuine" folder or not
            yield os.path.join(subdir, file), int(subdir.split(os.path.sep)[-1] == "Genuine")

def input_parser(img_path, label):
    # convert the label to one-hot encoding
    one_hot = tf.one_hot(label, 2)
    # read the img from file
    img_file = tf.read_file(img_path)
    img_decoded = tf.image.decode_png(img_file, channels=3)
    return img_decoded, one_hot

def get_dataset():
    generator = lambda: enumerate_all_files("/tmp/images/training/")
    dataset = Dataset.from_generator(generator, (tf.string, tf.int32)).shuffle(1000).batch(100)
    dataset = dataset.map(input_parser)
    return dataset
但是,当我在终端中使用

tf.enable_eager_execution()
# all the code above
d = get_dataset()
for f in d.make_one_shot_iterator():
    print(f)
它因一个错误而崩溃

W tensorflow/core/framework/op_kernel.cc:1306] Unknown: SystemError: <weakref at 0x7ff8232f0620; to 'function' at 0x7ff8233c9048 (generator_py_func)> returned a result with an error set
TypeError: expected bytes, str found  

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "lcnn.py", line 29, in <module>
    for f in d.make_one_shot_iterator():
  File "/opt/anaconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/data/ops/iterator_ops.py", line 487, in __next__
    return self.next()
  File "/opt/anaconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/data/ops/iterator_ops.py", line 518, in next
    return self._next_internal()
  File "/opt/anaconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/data/ops/iterator_ops.py", line 508, in _next_internal
    output_shapes=self._flat_output_shapes)
  File "/opt/anaconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/ops/gen_dataset_ops.py", line 1848, in iterator_get_next_sync
    "output_types", output_types, "output_shapes", output_shapes)
SystemError: <built-in function TFE_Py_FastPathExecute> returned a result with an error set

这很好,尽管我不确定错误消息,但我尝试了您的代码,如果我在批处理和洗牌操作之前进行输入解析器映射,它对我很有效:

def get_dataset():
    generator = lambda: enumerate_all_files("/tmp/images/training/")
    dataset = Dataset.from_generator(generator, (tf.string,tf.int32)).map(input_parser)
    dataset = dataset.shuffle(1000).batch(100)
    return dataset

我最终将代码从
数据集。从\u生成器
更改为
数据集。从\u张量\u切片
。最终代码:

import tensorflow as tf
from tensorflow.data import Dataset
import os
tf.enable_eager_execution()

def enumerate_all_files(rootdir):
    for subdir, dir, files in os.walk(rootdir):
        for file in files:
            # return path to the file and its label
            # label is simply a 1 or 0 depending on whether an image is in the "Genuine" folder or not
            yield os.path.join(subdir, file), int(subdir.split(os.path.sep)[-1] == "Genuine")

def input_parser(img_path, label):
    # convert the label to one-hot encoding
    one_hot = tf.one_hot(label, 2)
    # read the img from file
    img_file = tf.read_file(img_path)
    img_decoded = tf.image.decode_png(img_file, channels=3)
    return img_decoded, one_hot

def get_dataset():

    file_paths = []
    labels = []

    for i in enumerate_all_files("/media/kuba/Seagate Expansion Drive/MGR/Spektrogramy/FFT/training/"):
        file_paths.append(i[0])
        labels.append(i[1])
    dataset = Dataset.from_tensor_slices((file_paths, labels)).map(input_parser).shuffle(1000).batch(100)
    return dataset

d = get_dataset()
for f in d.make_one_shot_iterator():
    print(type(f))

我仍然得到一个错误,我检查了发电机,它按预期工作。这很奇怪,它对我有效。我的所有图像都在同一个目录中,因此生成器始终返回“False”。这里是Tensorflow 1.9.0…哦,我将返回的标签更改为0或1,而不是False或True。我还有Tensorflow 1.9仍然适用于我。你能确认你的图像没有问题吗?您能否为目录中的所有图像运行
tf.read_file
tf.decode_png
?即使未调用
map
batch
shuffle
import tensorflow as tf
from tensorflow.data import Dataset
import os
tf.enable_eager_execution()

def enumerate_all_files(rootdir):
    for subdir, dir, files in os.walk(rootdir):
        for file in files:
            # return path to the file and its label
            # label is simply a 1 or 0 depending on whether an image is in the "Genuine" folder or not
            yield os.path.join(subdir, file), int(subdir.split(os.path.sep)[-1] == "Genuine")

def input_parser(img_path, label):
    # convert the label to one-hot encoding
    one_hot = tf.one_hot(label, 2)
    # read the img from file
    img_file = tf.read_file(img_path)
    img_decoded = tf.image.decode_png(img_file, channels=3)
    return img_decoded, one_hot

def get_dataset():

    file_paths = []
    labels = []

    for i in enumerate_all_files("/media/kuba/Seagate Expansion Drive/MGR/Spektrogramy/FFT/training/"):
        file_paths.append(i[0])
        labels.append(i[1])
    dataset = Dataset.from_tensor_slices((file_paths, labels)).map(input_parser).shuffle(1000).batch(100)
    return dataset

d = get_dataset()
for f in d.make_one_shot_iterator():
    print(type(f))