Python 如何读取（解码）.tfrecords文件，查看其中的图像并进行增强？_Python_Tensorflow_Decode_Tfrecord_Data Augmentation

Python 如何读取（解码）.tfrecords文件，查看其中的图像并进行增强？

python tensorflow

Python 如何读取（解码）.tfrecords文件，查看其中的图像并进行增强？,python,tensorflow,decode,tfrecord,data-augmentation,Python,Tensorflow,Decode,Tfrecord,Data Augmentation,我有一个.tfrecords文件，我想提取、查看文件中的图像并增强它们。我正在使用 TensorFlow版本：2.3.0 对于下面的代码 raw_dataset = tf.data.TFRecordDataset("*path.tfrecords") for raw_record in raw_dataset.take(1): example = tf.train.Example() example.ParseFromString(raw_record.n

我有一个

.tfrecords

文件，我想提取、查看文件中的图像并增强它们。我正在使用 TensorFlow版本：2.3.0

对于下面的代码

raw_dataset = tf.data.TFRecordDataset("*path.tfrecords")

for raw_record in raw_dataset.take(1):
    example = tf.train.Example()
    example.ParseFromString(raw_record.numpy())
    print(example)

我面临以下输出：

features {
  feature {
    key: "depth"
    value {
      int64_list {
        value: 3
      }
    }
  }
  feature {
    key: "height"
    value {
      int64_list {
        value: 333
      }
    }
  }
  feature {
    key: "image_raw"
    value {
      bytes_list {
        value:
      }
    }
  }
  feature {
    key: "label"
    value {
      int64_list {
        value: 16
      }
    }
  }
  feature {
    key: "width"
    value {
      int64_list {
        value: 500
      }
    }
  }
}

下面是一个简单的代码，可以将.tfrecord图像提取为.png格式

要运行下一个代码，您需要通过

pip install tensorflow tensorflow\u插件numpy matplotlib

安装一次性pip模块

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
import tensorflow as tf, PIL.Image, numpy as np

raw_dataset = tf.data.TFRecordDataset('max_32_set.tfrecords')

for i, raw_record in enumerate(raw_dataset.take(3)):
    example = tf.train.Example()
    example.ParseFromString(raw_record.numpy())
    info = {}
    for k, v in example.features.feature.items():
        if k == 'image_raw':
            info[k] = v.bytes_list.value[0]
        elif k in ['depth', 'height', 'width']:
            info[k] = v.int64_list.value[0]
    img_arr = np.frombuffer(info['image_raw'], dtype = np.uint8).reshape(
        info['height'], info['width'], info['depth']
    )
    # You can use img_arr numpy array above to directly augment/preprocess
    # your image without saving it to .png.
    img = PIL.Image.fromarray(img_arr)
    img.save(f'max_32_set.tfrecords.{str(i).zfill(5)}.png')

数据集中的第一个图像：

下面是每个标签的图形图像数代码。

max_32_set.tfrecords

文件中的标签表示为整数（而不是字符串名称），标签名称可能位于单独的小文件中，其中包含有关数据集的元信息

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
import tensorflow as tf, numpy as np, matplotlib.pyplot as plt

raw_dataset = tf.data.TFRecordDataset('max_32_set.tfrecords')

labels_cnts = {}
for i, raw_record in enumerate(raw_dataset.as_numpy_iterator()):
    example = tf.train.Example()
    example.ParseFromString(raw_record)
    info = {}
    for k, v in example.features.feature.items():
        if k == 'label':
            info[k] = v.int64_list.value[0]
    labels_cnts[info['label']] = labels_cnts.get(info['label'], 0) + 1

x, y = zip(*sorted(labels_cnts.items(), key = lambda e: e[0]))
plt.xlabel('label')
plt.ylabel('num images')
plt.plot(x, y)
plt.xticks(x)
plt.show()

max\u 32\u set.tfrecords的绘图

：

下一个代码使用高斯噪声和高斯模糊进行增强，增强的tfrecord数据集保存到

max_32_set.augmented.tfrecords

文件：

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
import tensorflow as tf, tensorflow_addons as tfa, PIL.Image, numpy as np, math

c_inp_fname = 'max_32_set.tfrecords'
c_out_fname = 'max_32_set.augmented.tfrecords'
c_augment_types = ('noise', 'blur', 'noise_blur', 'noise_blur_mirror')
c_res_class_size = None # If None then auto configured to maximal class size

def calc_labels():
    raw_dataset = tf.data.TFRecordDataset(c_inp_fname)
    cnts, labels = {}, []
    for i, raw_record in enumerate(raw_dataset):
        example = tf.train.Example()
        example.ParseFromString(raw_record.numpy())
        label = example.features.feature['label'].int64_list.value[0]
        cnts[label] = cnts.get(label, 0) + 1
        labels.append(label)
    return cnts, labels

def img_gen():
    raw_dataset = tf.data.TFRecordDataset(c_inp_fname)
    for i, raw_record in enumerate(raw_dataset):
        example = tf.train.Example()
        example.ParseFromString(raw_record.numpy())
        info = {}
        for k, v in example.features.feature.items():
            if k == 'image_raw':
                info[k] = v.bytes_list.value[0]
            elif k in ['depth', 'height', 'width']:
                info[k] = v.int64_list.value[0]
        img_arr = np.frombuffer(info['image_raw'], dtype = np.uint8).reshape(
            info['height'], info['width'], info['depth']
        )
        yield example, img_arr
        
def gaussian_noise(inp, stddev):
    noise = tf.random.normal(shape = tf.shape(inp), mean = 0.0, stddev = stddev, dtype = inp.dtype)
    return inp + noise
        
def augment(a, cnt):
    min_noise_stddev, max_noise_stddev = 5., 20.
    blur_kern, min_blur_stddev, max_blur_stddev = 3, 1., 5.
    
    assert cnt >= 1
    pad_a = lambda x: np.pad(x, (
        (0, 2 ** math.ceil(math.log(x.shape[0]) / math.log(2)) - x.shape[0]),
        (0, 2 ** math.ceil(math.log(x.shape[1]) / math.log(2)) - x.shape[1]),
        (0, 0)), constant_values = 0)
    post_a = lambda x: np.clip(x[:a.shape[0], :a.shape[1]], 0, 255).astype(np.uint8)
    yield 'orig', a
    cnt -= 1
    res = []
    fcnt = math.ceil(cnt / len(c_augment_types))
    linsp = lambda l, r, c: [(l + (i + 1) * (r - l) / (c + 1)) for i in range(c)]
    for noise_stddev, blur_stddev in zip(linsp(min_noise_stddev, max_noise_stddev, fcnt), linsp(min_blur_stddev, max_blur_stddev, fcnt)):
        if 'noise' in c_augment_types:
            #yield 'noise', post_a(tf.keras.layers.GaussianNoise(stddev = noise_stddev)(prep_a, training = True).numpy())
            res.append(('noise', post_a(gaussian_noise(a.astype(np.float32), stddev = noise_stddev).numpy())))
        if 'blur' in c_augment_types:
            res.append(('blur', post_a(tfa.image.gaussian_filter2d(pad_a(a).astype(np.float32), filter_shape = blur_kern, sigma = blur_stddev).numpy())))
        if 'noise_blur' in c_augment_types or 'noise_blur_mirror' in c_augment_types:
            nbr = post_a(tfa.image.gaussian_filter2d(
                pad_a(gaussian_noise(a.astype(np.float32), stddev = noise_stddev).numpy()),
                filter_shape = blur_kern, sigma = blur_stddev).numpy())
            if 'noise_blur' in c_augment_types:
                res.append(('noise_blur', nbr))
            if 'noise_blur_mirror' in c_augment_types:
                res.append(('noise_blur_mirror', tf.image.flip_left_right(nbr).numpy().astype(np.uint8)))
    assert cnt <= len(res) <= cnt + len(c_augment_types), (cnt, len(res), len(c_augment_types))
    yield from res[:cnt]

def process():
    labels_cnts, labels = calc_labels()
    max_class_size = max(labels_cnts.values())
    if c_res_class_size is not None:
        assert max_class_size <= c_res_class_size, f'Maximal class size is {max_class_size}, while requested res class size is smaller, {c_res_class_size}!'
        class_size = c_res_class_size
    else:
        class_size = max_class_size
    cur_labels_cnts = {}
    for iimg, (proto, imga) in enumerate(img_gen()):
        label = proto.features.feature['label'].int64_list.value[0]
        cur_labels_cnts[label] = cur_labels_cnts.get(label, 0) + 1
        need_cnt = class_size // labels_cnts[label] + int(cur_labels_cnts[label] <= class_size % labels_cnts[label])
        for iaug, (taug, aug) in enumerate(augment(imga, need_cnt)):
            #PIL.Image.fromarray(aug).save(f'max_32_set.tfrecords.aug.{str(iimg).zfill(5)}.{iaug}_{taug}.png')
            protoc = type(proto)()
            protoc.ParseFromString(proto.SerializeToString())
            protoc.features.feature['image_raw'].bytes_list.value[0] = aug.tobytes()
            yield protoc.SerializeToString()
        if (iimg % 10) == 0:
            print(iimg, ' ', sep = '', end = '', flush = True)
            
def main():
    assert tf.executing_eagerly()
    tf.data.experimental.TFRecordWriter(c_out_fname).write(
        tf.data.TFRecordDataset.from_generator(process, tf.string)
    )

main()

要从.tfrecords中查看图像吗？没错，我要从文件中打印一到两个图像@AkashDesai@lko-SOF是你的.tfrecord文件的秘密吗？你能分享其中的一小部分，还是全部？因此，我们可以尝试编写一个脚本，从中提取图像。Hello@Arty，请在以下链接中查找上载的数据集Hello@Arty，您是否可以演示如何执行相同操作，但如果图像位于5个文件夹中。每个i文件夹都有i类图像。要考虑的是，在这种情况下，图像是。jpg@Iko-如您所见，目前我使用

tf.data.TFRecordDataset

作为输入和输出。对于你目前的情况，你想要什么？据我所知，您不再有TFRecordDataset，而只有包含图像的文件夹。关于输出，您希望它采用哪种格式？另外，带有图像的文件夹或输出应为TFRecordDataset？感谢您在@Arty的快速响应，输出将再次以.jpg格式（因此不再使用tfrecord dataset），我认为最好在单独的问题中进行，无需干扰您的回答。你能在下面的链接中找到新的问题吗？@Iko SOF我想我会在这里扩展我目前的答案，采用你的文件夹案例，因为大多数代码都是相同的，我不认为StackOverflow通过做一些小的修改将几乎整个代码从一个答案复制到另一个答案是好的。StackOverflow倾向于给出一个丰富的答案，并将其他问题标记为重复。

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
import tensorflow as tf, tensorflow_addons as tfa, PIL.Image, numpy as np, math, matplotlib.pyplot as plt

c_inp_dir = './images/'
c_out_dir = './images_out/'
c_augment_types = ('noise', 'blur', 'noise_blur', 'noise_blur_mirror')
c_res_class_size = None # If None then auto configured to maximal class size

def calc_labels(dirn = None):
    if dirn is None:
        dirn = c_inp_dir
    cnts, labels = {}, []
    for label in sorted(os.listdir(f'{dirn}')):
        label = int(label)
        labels.append(label)
        cnts[label] = len(os.listdir(f'{dirn}/{label}/'))
    return cnts, labels

def img_gen():
    cnts = {}
    for label in sorted(os.listdir(c_inp_dir)):
        label = int(label)
        for fname in sorted(os.listdir(f'{c_inp_dir}/{label}/')):
            img_arr = np.array(PIL.Image.open(f'{c_inp_dir}/{label}/{fname}'))
            yield label, img_arr, fname
        
def gaussian_noise(inp, stddev):
    noise = tf.random.normal(shape = tf.shape(inp), mean = 0.0, stddev = stddev, dtype = inp.dtype)
    return inp + noise
        
def augment(a, cnt):
    min_noise_stddev, max_noise_stddev = 5., 20.
    blur_kern, min_blur_stddev, max_blur_stddev = 3, 1., 5.
    
    assert cnt >= 1
    pad_a = lambda x: np.pad(x, (
        (0, 2 ** math.ceil(math.log(x.shape[0]) / math.log(2)) - x.shape[0]),
        (0, 2 ** math.ceil(math.log(x.shape[1]) / math.log(2)) - x.shape[1]),
        (0, 0)), constant_values = 0)
    post_a = lambda x: np.clip(x[:a.shape[0], :a.shape[1]], 0, 255).astype(np.uint8)
    yield 'orig', a
    cnt -= 1
    res = []
    fcnt = math.ceil(cnt / len(c_augment_types))
    linsp = lambda l, r, c: [(l + (i + 1) * (r - l) / (c + 1)) for i in range(c)]
    for noise_stddev, blur_stddev in zip(linsp(min_noise_stddev, max_noise_stddev, fcnt), linsp(min_blur_stddev, max_blur_stddev, fcnt)):
        if 'noise' in c_augment_types:
            #yield 'noise', post_a(tf.keras.layers.GaussianNoise(stddev = noise_stddev)(prep_a, training = True).numpy())
            res.append(('noise', post_a(gaussian_noise(a.astype(np.float32), stddev = noise_stddev).numpy())))
        if 'blur' in c_augment_types:
            res.append(('blur', post_a(tfa.image.gaussian_filter2d(pad_a(a).astype(np.float32), filter_shape = blur_kern, sigma = blur_stddev).numpy())))
        if 'noise_blur' in c_augment_types or 'noise_blur_mirror' in c_augment_types:
            nbr = post_a(tfa.image.gaussian_filter2d(
                pad_a(gaussian_noise(a.astype(np.float32), stddev = noise_stddev).numpy()),
                filter_shape = blur_kern, sigma = blur_stddev).numpy())
            if 'noise_blur' in c_augment_types:
                res.append(('noise_blur', nbr))
            if 'noise_blur_mirror' in c_augment_types:
                res.append(('noise_blur_mirror', tf.image.flip_left_right(nbr).numpy().astype(np.uint8)))
    assert cnt <= len(res) <= cnt + len(c_augment_types), (cnt, len(res), len(c_augment_types))
    yield from res[:cnt]

def process():
    labels_cnts, labels = calc_labels()
    max_class_size = max(labels_cnts.values())
    if c_res_class_size is not None:
        assert max_class_size <= c_res_class_size, f'Maximal class size is {max_class_size}, while requested res class size is smaller, {c_res_class_size}!'
        class_size = c_res_class_size
    else:
        class_size = max_class_size
    
    cur_labels_cnts = {}
    for iimg, (label, imga, fname) in enumerate(img_gen()):
        os.makedirs(f'{c_out_dir}/{label}/', exist_ok = True)
        cur_labels_cnts[label] = cur_labels_cnts.get(label, 0) + 1
        need_cnt = class_size // labels_cnts[label] + int(cur_labels_cnts[label] <= class_size % labels_cnts[label])
        for iaug, (taug, aug) in enumerate(augment(imga, need_cnt)):
            PIL.Image.fromarray(aug).save(f'{c_out_dir}/{label}/{fname}.{iaug}_{taug}.png')
        if (iimg % 10) == 0:
            print(iimg, ' ', sep = '', end = '', flush = True)
            
def plot_cnts(dirn):
    labels_cnts = calc_labels(dirn)[0]
    x, y = zip(*sorted(labels_cnts.items(), key = lambda e: e[0]))
    plt.xlabel('label')
    plt.ylabel('num images')
    plt.plot(x, y)
    plt.xticks(x)
    plt.show()
            
def main():
    process()
    plot_cnts(c_inp_dir)
    plot_cnts(c_out_dir)

main()