Tensorflow 在COCO数据集上为所选类别ID创建tf_记录
我有完整的COCO 2017数据集(训练、测试、Val),我想从中创建tf_记录文件。 但并非所有类都适用,而是仅适用于某些选定类。 因此,我有一个label_映射,其中包含了我希望包含在以下tf格式的tf_记录中的所有类:Tensorflow 在COCO数据集上为所选类别ID创建tf_记录,tensorflow,dataset,tfrecord,mscoco,Tensorflow,Dataset,Tfrecord,Mscoco,我有完整的COCO 2017数据集(训练、测试、Val),我想从中创建tf_记录文件。 但并非所有类都适用,而是仅适用于某些选定类。 因此,我有一个label_映射,其中包含了我希望包含在以下tf格式的tf_记录中的所有类: item { name: "/m/01g317" id: 1 display_name: "person" } item { name: "/m/0199g" id: 2 display_name: "bicycle" } 为所有类创建完整tf_记
item {
name: "/m/01g317"
id: 1
display_name: "person"
}
item {
name: "/m/0199g"
id: 2
display_name: "bicycle"
}
为所有类创建完整tf_记录的标准脚本如下:
def create_tf_example(image,
annotations_list,
image_dir,
category_index,
include_masks=False):
"""Converts image and annotations to a tf.Example proto.
Args:
image: dict with keys:
[u'license', u'file_name', u'coco_url', u'height', u'width',
u'date_captured', u'flickr_url', u'id']
annotations_list:
list of dicts with keys:
[u'segmentation', u'area', u'iscrowd', u'image_id',
u'bbox', u'category_id', u'id']
Notice that bounding box coordinates in the official COCO dataset are
given as [x, y, width, height] tuples using absolute coordinates where
x, y represent the top-left (0-indexed) corner. This function converts
to the format expected by the Tensorflow Object Detection API (which is
which is [ymin, xmin, ymax, xmax] with coordinates normalized relative
to image size).
image_dir: directory containing the image files.
category_index: a dict containing COCO category information keyed
by the 'id' field of each category. See the
label_map_util.create_category_index function.
include_masks: Whether to include instance segmentations masks
(PNG encoded) in the result. default: False.
Returns:
example: The converted tf.Example
num_annotations_skipped: Number of (invalid) annotations that were ignored.
Raises:
ValueError: if the image pointed to by data['filename'] is not a valid JPEG
"""
image_height = image['height']
image_width = image['width']
filename = image['file_name']
image_id = image['id']
full_path = os.path.join(image_dir, filename)
with tf.gfile.GFile(full_path, 'rb') as fid:
encoded_jpg = fid.read()
encoded_jpg_io = io.BytesIO(encoded_jpg)
image = PIL.Image.open(encoded_jpg_io)
key = hashlib.sha256(encoded_jpg).hexdigest()
xmin = []
xmax = []
ymin = []
ymax = []
is_crowd = []
category_names = []
category_ids = []
area = []
encoded_mask_png = []
num_annotations_skipped = 0
for object_annotations in annotations_list:
(x, y, width, height) = tuple(object_annotations['bbox'])
if width <= 0 or height <= 0:
num_annotations_skipped += 1
continue
if x + width > image_width or y + height > image_height:
num_annotations_skipped += 1
continue
xmin.append(float(x) / image_width)
xmax.append(float(x + width) / image_width)
ymin.append(float(y) / image_height)
ymax.append(float(y + height) / image_height)
is_crowd.append(object_annotations['iscrowd'])
category_id = int(object_annotations['category_id'])
category_ids.append(category_id)
category_names.append(category_index[category_id]['name'].encode('utf8'))
area.append(object_annotations['area'])
if include_masks:
run_len_encoding = mask.frPyObjects(object_annotations['segmentation'],
image_height, image_width)
binary_mask = mask.decode(run_len_encoding)
if not object_annotations['iscrowd']:
binary_mask = np.amax(binary_mask, axis=2)
pil_image = PIL.Image.fromarray(binary_mask)
output_io = io.BytesIO()
pil_image.save(output_io, format='PNG')
encoded_mask_png.append(output_io.getvalue())
feature_dict = {
'image/height':
dataset_util.int64_feature(image_height),
'image/width':
dataset_util.int64_feature(image_width),
'image/filename':
dataset_util.bytes_feature(filename.encode('utf8')),
'image/source_id':
dataset_util.bytes_feature(str(image_id).encode('utf8')),
'image/key/sha256':
dataset_util.bytes_feature(key.encode('utf8')),
'image/encoded':
dataset_util.bytes_feature(encoded_jpg),
'image/format':
dataset_util.bytes_feature('jpeg'.encode('utf8')),
'image/object/bbox/xmin':
dataset_util.float_list_feature(xmin),
'image/object/bbox/xmax':
dataset_util.float_list_feature(xmax),
'image/object/bbox/ymin':
dataset_util.float_list_feature(ymin),
'image/object/bbox/ymax':
dataset_util.float_list_feature(ymax),
'image/object/class/label':
dataset_util.int64_list_feature(category_ids),
'image/object/is_crowd':
dataset_util.int64_list_feature(is_crowd),
'image/object/area':
dataset_util.float_list_feature(area),
}
if include_masks:
feature_dict['image/object/mask'] = (
dataset_util.bytes_list_feature(encoded_mask_png))
example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
return key, example, num_annotations_skipped
def _create_tf_record_from_coco_annotations(
annotations_file, image_dir, output_path, include_masks):
"""Loads COCO annotation json files and converts to tf.Record format.
Args:
annotations_file: JSON file containing bounding box annotations.
image_dir: Directory containing the image files.
output_path: Path to output tf.Record file.
include_masks: Whether to include instance segmentations masks
(PNG encoded) in the result. default: False.
"""
with tf.gfile.GFile(annotations_file, 'r') as fid:
groundtruth_data = json.load(fid)
images = groundtruth_data['images']
category_index = label_map_util.create_category_index(
groundtruth_data['categories'])
annotations_index = {}
if 'annotations' in groundtruth_data:
tf.logging.info(
'Found groundtruth annotations. Building annotations index.')
for annotation in groundtruth_data['annotations']:
image_id = annotation['image_id']
if image_id not in annotations_index:
annotations_index[image_id] = []
annotations_index[image_id].append(annotation)
missing_annotation_count = 0
for image in images:
image_id = image['id']
if image_id not in annotations_index:
missing_annotation_count += 1
annotations_index[image_id] = []
tf.logging.info('%d images are missing annotations.',
missing_annotation_count)
tf.logging.info('writing to output path: %s', output_path)
writer = tf.python_io.TFRecordWriter(output_path)
total_num_annotations_skipped = 0
for idx, image in enumerate(images):
if idx % 100 == 0:
tf.logging.info('On image %d of %d', idx, len(images))
annotations_list = annotations_index[image['id']]
_, tf_example, num_annotations_skipped = create_tf_example(
image, annotations_list, image_dir, category_index, include_masks)
total_num_annotations_skipped += num_annotations_skipped
writer.write(tf_example.SerializeToString())
writer.close()
tf.logging.info('Finished writing, skipped %d annotations.',
total_num_annotations_skipped)
def create_tf_示例(图,
注释和列表,
图片室主任,
类别索引,
包括_masks=False):
“”“将图像和批注转换为tf。示例原型。
Args:
图:带按键的dict:
[u'license',u'file_name',u'coco_url',u'height',u'width',
u'date\u captured',u'flickr\u url',u'id']
注释列表:
带键的DICT列表:
[u'分割'、u'区域'、u'iscrowd'、u'image\u id',
u'bbox',u'category_id',u'id']
请注意,官方COCO数据集中的边界框坐标是
使用绝对坐标给出[x,y,width,height]元组,其中
x、 y表示左上角(0索引)。此函数用于转换
为Tensorflow对象检测API(即
它是[ymin,xmin,ymax,xmax],坐标是标准化的相对坐标
图像大小)。
image\u dir:包含图像文件的目录。
分类索引:包含COCO分类信息的目录
通过每个类别的“id”字段。请参阅
label\u map\u util.create\u category\u index函数。
包含\ U遮罩:是否包含实例分段遮罩
(PNG编码)在结果中。默认值:False。
返回:
示例:转换后的tf.example
num_annotations_skipped:被忽略的(无效)批注数。
提出:
ValueError:如果数据['filename']指向的图像不是有效的JPEG
"""
图像高度=图像['height']
图像宽度=图像['width']
filename=image['file\u name']
image\u id=image['id']
完整路径=os.path.join(图像目录,文件名)
将tf.gfile.gfile(完整路径,'rb')作为fid:
encoded_jpg=fid.read()
encoded_jpg_io=io.BytesIO(encoded_jpg)
image=PIL.image.open(编码的\u jpg\u io)
key=hashlib.sha256(encoded_jpg).hexdigest()
xmin=[]
xmax=[]
ymin=[]
ymax=[]
is_crowd=[]
类别名称=[]
类别_id=[]
面积=[]
编码的_掩码_png=[]
num\u注释\u跳过=0
对于注释列表中的对象注释:
(x,y,width,height)=元组(object_注释['bbox'])
如果图像宽度大于高度:
跳过的注释数+=1
持续
xmin.append(浮点(x)/图像宽度)
xmax.append(浮点(x+宽度)/图像宽度)
ymin.append(浮动(y)/图像高度)
ymax.append(浮点(y+高度)/图像\高度)
is_crowd.append(对象_注释['iscrowd'])
category\u id=int(对象注释['category\u id'])
类别id.append(类别id)
category_names.append(category_索引[category_id]['name'].encode('utf8'))
area.append(对象_注释['area'])
如果包括(u)遮罩:
run_len_encoding=mask.frPyObjects(对象注释['segmentation'],
图像(高度、图像宽度)
二进制掩码=掩码.解码(运行二进制编码)
如果不是对象注释['iscrowd']:
二进制屏蔽=np.amax(二进制屏蔽,轴=2)
pil\u image=pil.image.fromarray(二进制掩码)
输出io=io.BytesIO()
pil_image.save(输出io,格式='PNG')
编码的\u掩码\u png.append(输出\u io.getvalue())
特征参数={
“图像/高度”:
dataset_util.int64_功能(图像高度),
“图像/宽度”:
dataset_util.int64_功能(图像宽度),
“图像/文件名”:
dataset_util.bytes_功能(filename.encode('utf8')),
“图像/源_id”:
dataset_util.bytes_功能(str(image_id).encode('utf8')),
“image/key/sha256”:
dataset_util.bytes_功能(key.encode('utf8')),
“图像/编码”:
dataset_util.bytes_功能(编码的_jpg),
“图像/格式”:
dataset_util.bytes_功能('jpeg'。编码('utf8'),
'image/object/bbox/xmin':
dataset_util.float_list_功能(xmin),
“image/object/bbox/xmax”:
dataset_util.float_list_功能(xmax),
'image/object/bbox/ymin':
dataset_util.float_list_功能(ymin),
'image/object/bbox/ymax':
数据集\u util.float\u列表\u功能(ymax),
“图像/对象/类/标签”:
数据集\u util.int64\u列表\u功能(类别\u ID),
“图像/对象/是人群”:
dataset_util.int64_列表_功能(is_群组),
“图像/对象/区域”:
数据集\u util.float\u列表\u功能(区域),
}
如果包括(u)遮罩:
特征[图像/对象/遮罩]=(
数据集\使用字节\列表\功能(编码\掩码\ png))
示例=tf.train.example(特征=tf.train.features(特征=feature_dict))
返回键,例如,已跳过num\u注释
定义(def)根据(coco)注释创建(tf)记录(record)(
注释\u文件、图像\u目录、输出\u路径,包括\u掩码):
“”“加载COCO注释json文件并转换为tf.Record格式。
Args:
annotations\u文件:包含边界框注释的JSON文件。
image\u dir:包含图像文件的目录。
输出路径:输出tf.Record文件的路径。
包含\ U遮罩:是否包含实例分段遮罩
(PNG编码)在结果中。默认值:False。
"""
将tf.gfile.gfile(注释文件'r')作为fid:
groundtruth_data=json.load(fid)
图像=地面真实数据['images']
category\u index=label\u map\u util.create\u category\u index(
地面真实数据[‘类别’])
注释\u索引={}
如果groundtruth_数据中有“注释”:
tf.logging.info(
“找到了groundtruth批注。正在构建批注索引。”)
对于groundtruth_数据中的注释['annotations']:
图像\u id=annotat