Python 从PASCAL VOC数据集中重新创建具有少量类的注释文件
我正在使用PASCAL VOCS 2012对象检测数据集,我想创建一个新的注释和一个新的jpeg文件,只包含我需要的某些类。我能够生成一个新的jpeg文件,但不知道如何用首选类重新创建一个新的注释文件。任何关于如何进行的想法都将不胜感激Python 从PASCAL VOC数据集中重新创建具有少量类的注释文件,python,file,machine-learning,deep-learning,computer-vision,Python,File,Machine Learning,Deep Learning,Computer Vision,我正在使用PASCAL VOCS 2012对象检测数据集,我想创建一个新的注释和一个新的jpeg文件,只包含我需要的某些类。我能够生成一个新的jpeg文件,但不知道如何用首选类重新创建一个新的注释文件。任何关于如何进行的想法都将不胜感激 #get the path to each image, its size and annotations def get_xml_contents(name): contents = etree.parse(name) objects = co
#get the path to each image, its size and annotations
def get_xml_contents(name):
contents = etree.parse(name)
objects = contents.findall('./object')
size = (int(float(contents.find('.//width').text)), int(float(contents.find('.//height').text)))
file_name = contents.find('./filename').text
d= [{item.find('name').text: [int(float(item.find('bndbox/xmin').text)),
int(float(item.find('bndbox/ymin').text)),
int(float(item.find('bndbox/xmax').text)),
int(float(item.find('bndbox/ymax').text))]}
for item in objects]
return {'size': size, 'filename': file_name, 'objects': d}
annotations = []
# prefered classes
prefered_classes = ['car', 'chair', 'bird', 'aeroplane']
indextoclasses = {k: v for k, v in enumerate(prefered_classes)}
classestoindex= {v: k for k, v in indextoclasses.items()}
#parse the annotation file and extract them into a dictionary
annot_path = "C:\\Users\\Desktop\\pascal\\Annotations"
for filename in sorted(os.listdir(annot_path)):
annotation = get_xml_contents(os.path.join(annot_path, filename))
objects = []
for obj in annotation['objects']:
if list(obj.keys())[0] in classes:
objects.append(obj)
# extract only the ones with one class
if len(objects) == 1:
annotation['class'] = list(objects[0].keys())[0]
annotation['bbox'] = list(objects[0].values())[0]
annotation.pop('objects')
annotations.append(annotation)
#create df with annotations
pd_annot = pd.DataFrame(annotations)
#copy images with certain classes to a new folder
src_dir = "C:\\Users\\Desktop\\pascal\\JPEGImages"
dst_dir = "C:\\Users\\Desktop\\pascal\\sampleJPEG"
imageNames = pd_annot['filename']
for imageName in imageNames:
shutil.copy(os.path.join(src_dir, imageName), dst_dir)