Python 读取包含存储为numpy阵列的图像的hdf5文件的最有效方法是什么?
我正在将图像文件转换为hdf5文件,如下所示:Python 读取包含存储为numpy阵列的图像的hdf5文件的最有效方法是什么?,python,image,opencv,python-imaging-library,hdf5,Python,Image,Opencv,Python Imaging Library,Hdf5,我正在将图像文件转换为hdf5文件,如下所示: import h5py import io import os import cv2 import numpy as np from PIL import Image def convertJpgtoH5(input_dir, filename, output_dir): filepath = input_dir + '/' + filename print('image size: %d bytes'%os.path.getsi
import h5py
import io
import os
import cv2
import numpy as np
from PIL import Image
def convertJpgtoH5(input_dir, filename, output_dir):
filepath = input_dir + '/' + filename
print('image size: %d bytes'%os.path.getsize(filepath))
img_f = open(filepath, 'rb')
binary_data = img_f.read()
binary_data_np = np.asarray(binary_data)
new_filepath = output_dir + '/' + filename[:-4] + '.hdf5'
f = h5py.File(new_filepath, 'w')
dset = f.create_dataset('image', data = binary_data_np)
f.close()
print('hdf5 file size: %d bytes'%os.path.getsize(new_filepath))
pathImg = '/path/to/images'
pathH5 = '/path/to/hdf5/files'
ext = [".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif"]
for img in os.listdir(pathImg):
if img.endswith(tuple(ext)):
convertJpgtoH5(pathImg, img, pathH5)
for hf in os.listdir(pathH5):
if hf.endswith(".hdf5"):
hf = h5py.File(f"{pathH5}/{hf}", "r")
key = list(hf.keys())[0]
data = np.array(hf[key])
img = Image.open(io.BytesIO(data))
image = cv2.cvtColor(np.float32(img), cv2.COLOR_BGR2RGB)
hf.close()
我后来阅读了这些hdf5文件,如下所示:
import h5py
import io
import os
import cv2
import numpy as np
from PIL import Image
def convertJpgtoH5(input_dir, filename, output_dir):
filepath = input_dir + '/' + filename
print('image size: %d bytes'%os.path.getsize(filepath))
img_f = open(filepath, 'rb')
binary_data = img_f.read()
binary_data_np = np.asarray(binary_data)
new_filepath = output_dir + '/' + filename[:-4] + '.hdf5'
f = h5py.File(new_filepath, 'w')
dset = f.create_dataset('image', data = binary_data_np)
f.close()
print('hdf5 file size: %d bytes'%os.path.getsize(new_filepath))
pathImg = '/path/to/images'
pathH5 = '/path/to/hdf5/files'
ext = [".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif"]
for img in os.listdir(pathImg):
if img.endswith(tuple(ext)):
convertJpgtoH5(pathImg, img, pathH5)
for hf in os.listdir(pathH5):
if hf.endswith(".hdf5"):
hf = h5py.File(f"{pathH5}/{hf}", "r")
key = list(hf.keys())[0]
data = np.array(hf[key])
img = Image.open(io.BytesIO(data))
image = cv2.cvtColor(np.float32(img), cv2.COLOR_BGR2RGB)
hf.close()
有没有一种更有效的方法来读取hdf5文件,而不是转换成numpy阵列,在使用OpenCV之前用枕头打开?理想情况下,这应该作为副本关闭,因为我在上面的评论中引用的答案中解释了您想做的大部分事情。我在这里包括这些链接:
import h5py
import glob
import os
import cv2
import numpy as np
def convertImagetoH5(imgfilename):
print('image size: %d bytes'%os.path.getsize(imgfilename))
img = cv2.imread(imgfilename, cv2.COLOR_BGR2RGB)
img_resize = cv2.resize(img, (IMG_WIDTH, IMG_HEIGHT) )
return img_resize
pathImg = '/path/to/images'
pathH5 = '/path/to/hdf5file'
ext_list = [".ppm", ".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif"]
IMG_WIDTH = 120
IMG_HEIGHT = 120
#get list of all images and number of images
all_images = []
for ext in ext_list:
all_images.extend(glob.glob(pathImg+"/*"+ext, recursive=True))
n_images = len(all_images)
ds_img_arr = np.zeros((n_images, IMG_WIDTH, IMG_HEIGHT,3),dtype=np.uint8)
for cnt,img in enumerate(all_images):
img_arr = convertImagetoH5(img)
ds_img_arr[cnt]=img_arr[:]
h5_filepath = pathH5 + '/all_image_data.hdf5'
with h5py.File(h5_filepath, 'w') as h5f:
dset = h5f.create_dataset('images', data=ds_img_arr)
print('hdf5 file size: %d bytes'%os.path.getsize(h5_filepath))
with h5py.File(h5_filepath, "r") as h5r:
key = list(h5r.keys())[0]
print (key, h5r[key].shape, h5r[key].dtype)
如果您真的希望每个图像有一个HDF5,下面将更新您问题中的代码。同样,只使用cv2——不需要PIL。图像不会调整大小。这仅用于完整性(演示流程)。这不是管理图像数据的方式
import h5py
import os
import cv2
import numpy as np
def convertImagetoH5(input_dir, filename, output_dir):
filepath = input_dir + '/' + filename
print('image size: %d bytes'%os.path.getsize(filepath))
img = cv2.imread(filepath, cv2.COLOR_BGR2RGB)
new_filepath = output_dir + '/' + filename[:-4] + '.hdf5'
with h5py.File(new_filepath, 'w') as h5f:
h5f.create_dataset('image', data =img)
print('hdf5 file size: %d bytes'%os.path.getsize(new_filepath))
pathImg = '/path/to/images'
pathH5 = '/path/to/hdf5file'
ext = [".ppm", ".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif"]
# Loop thru image files and create a matching HDF5 file
for img in os.listdir(pathImg):
if img.endswith(tuple(ext)):
convertImagetoH5(pathImg, img, pathH5)
# Loop thru HDF5 files and read image dataset (as an array)
for h5name in os.listdir(pathH5):
if h5name.endswith(".hdf5"):
with h5f = h5py.File(f"{pathH5}/{h5name}", "r") as h5f:
key = list(h5f.keys())[0]
image = h5f[key][:]
print(f'{h5name}: {image.shape}, {image.dtype}')
你说的“高效”是什么意思?是否要最小化所需的磁盘空间?还是读文件的时间?或者减少库依赖项的数量?如果我正确地遵循了您的代码,您将为每个图像创建一个HDF5文件,对吗?如果是这样,您会发现使用HDF5时,写入调用的数量比写入数据的大小更重要。因此,读取所有图像、转换为numpy数组、将每个图像添加到更大的数组(大小可容纳所有图像)中、然后在读取和转换所有图像后将数组作为单个数据集写入HDF5将更快。2个例子:1)和2)还有,为什么要使用枕头和OpenCV?两者都足够了。你不需要两者都用。你有没有检查你的代码来读取H5文件?我在
hf=h5py.File(f“data/{hf}”,“r”)
上得到一个错误。它应该是:hf=h5py.File(f“{path5}/{hf}”,“r”)
@MarkSetchell对我来说最重要的因素是读取时间。