电脑知识|欧美黑人一区二区三区|软件|欧美黑人一级爽快片淫片高清|系统|欧美黑人狂野猛交老妇|数据库|服务器|编程开发|网络运营|知识问答|技术教程文章 - 好吧啦网

您的位置:首頁技術文章
文章詳情頁

python實現提取COCO,VOC數據集中特定的類

瀏覽:6日期:2022-08-02 18:36:22

1.python提取COCO數據集中特定的類

安裝pycocotools github地址:https://github.com/philferriere/cocoapi

pip install git+https://github.com/philferriere/cocoapi.git#subdirectory=PythonAPI

提取特定的類別如下:

from pycocotools.coco import COCOimport osimport shutilfrom tqdm import tqdmimport skimage.io as ioimport matplotlib.pyplot as pltimport cv2from PIL import Image, ImageDraw #the path you want to save your results for coco to vocsavepath='/media/huanglong/Newsmy/COCO/' #保存提取類的路徑,我放在同一路徑下img_dir=savepath+’images/’anno_dir=savepath+’Annotations/’# datasets_list=[’train2014’, ’val2014’]datasets_list=[’train2014’] classes_names = [’person’] #coco有80類,這里寫要提取類的名字,以person為例#Store annotations and train2014/val2014/... in this folderdataDir= ’/media/huanglong/Newsmy/COCO/’ #原coco數據集 headstr = '''<annotation> <folder>VOC</folder> <filename>%s</filename> <source> <database>My Database</database> <annotation>COCO</annotation> <image>flickr</image> <flickrid>NULL</flickrid> </source> <owner> <flickrid>NULL</flickrid> <name>company</name> </owner> <size> <width>%d</width> <height>%d</height> <depth>%d</depth> </size> <segmented>0</segmented>'''objstr = ''' <object> <name>%s</name> <pose>Unspecified</pose> <truncated>0</truncated> <difficult>0</difficult> <bndbox> <xmin>%d</xmin> <ymin>%d</ymin> <xmax>%d</xmax> <ymax>%d</ymax> </bndbox> </object>''' tailstr = ’’’</annotation>’’’ #if the dir is not exists,make it,else delete itdef mkr(path): if os.path.exists(path): shutil.rmtree(path) os.mkdir(path) else: os.mkdir(path)mkr(img_dir)mkr(anno_dir)def id2name(coco): classes=dict() for cls in coco.dataset[’categories’]: classes[cls[’id’]]=cls[’name’] return classes def write_xml(anno_path,head, objs, tail): f = open(anno_path, 'w') f.write(head) for obj in objs: f.write(objstr%(obj[0],obj[1],obj[2],obj[3],obj[4])) f.write(tail) def save_annotations_and_imgs(coco,dataset,filename,objs): #eg:COCO_train2014_000000196610.jpg-->COCO_train2014_000000196610.xml anno_path=anno_dir+filename[:-3]+’xml’ img_path=dataDir+dataset+’/’+filename print(img_path) dst_imgpath=img_dir+filename img=cv2.imread(img_path) #if (img.shape[2] == 1): # print(filename + ' not a RGB image') # return shutil.copy(img_path, dst_imgpath) head=headstr % (filename, img.shape[1], img.shape[0], img.shape[2]) tail = tailstr write_xml(anno_path,head, objs, tail) def showimg(coco,dataset,img,classes,cls_id,show=True): global dataDir I=Image.open(’%s/%s/%s’%(dataDir,dataset,img[’file_name’])) #通過id,得到注釋的信息 annIds = coco.getAnnIds(imgIds=img[’id’], catIds=cls_id, iscrowd=None) # print(annIds) anns = coco.loadAnns(annIds) # print(anns) # coco.showAnns(anns) objs = [] for ann in anns: class_name=classes[ann[’category_id’]] if class_name in classes_names: print(class_name) if ’bbox’ in ann: bbox=ann[’bbox’] xmin = int(bbox[0]) ymin = int(bbox[1]) xmax = int(bbox[2] + bbox[0]) ymax = int(bbox[3] + bbox[1]) obj = [class_name, xmin, ymin, xmax, ymax] objs.append(obj) draw = ImageDraw.Draw(I) draw.rectangle([xmin, ymin, xmax, ymax]) if show: plt.figure() plt.axis(’off’) plt.imshow(I) plt.show() return objs for dataset in datasets_list: #./COCO/annotations/instances_train2014.json annFile=’{}/annotations/instances_{}.json’.format(dataDir,dataset) #COCO API for initializing annotated data coco = COCO(annFile) #show all classes in coco classes = id2name(coco) print(classes) #[1, 2, 3, 4, 6, 8] classes_ids = coco.getCatIds(catNms=classes_names) print(classes_ids) for cls in classes_names: #Get ID number of this class cls_id=coco.getCatIds(catNms=[cls]) img_ids=coco.getImgIds(catIds=cls_id) print(cls,len(img_ids)) # imgIds=img_ids[0:10] for imgId in tqdm(img_ids): img = coco.loadImgs(imgId)[0] filename = img[’file_name’] # print(filename) objs=showimg(coco, dataset, img, classes,classes_ids,show=False) print(objs) save_annotations_and_imgs(coco, dataset, filename, objs)

2. 將上一步提取的COCO 某一類 xml轉為COCO標準的json文件:

# -*- coding: utf-8 -*-# @Time : 2019/8/27 10:48# @Author :Rock# @File : voc2coco.py# just for object detectionimport xml.etree.ElementTree as ETimport osimport jsoncoco = dict()coco[’images’] = []coco[’type’] = ’instances’coco[’annotations’] = []coco[’categories’] = []category_set = dict()image_set = set()category_item_id = 0image_id = 0annotation_id = 0def addCatItem(name): global category_item_id category_item = dict() category_item[’supercategory’] = ’none’ category_item_id += 1 category_item[’id’] = category_item_id category_item[’name’] = name coco[’categories’].append(category_item) category_set[name] = category_item_id return category_item_iddef addImgItem(file_name, size): global image_id if file_name is None: raise Exception(’Could not find filename tag in xml file.’) if size[’width’] is None: raise Exception(’Could not find width tag in xml file.’) if size[’height’] is None: raise Exception(’Could not find height tag in xml file.’) img_id = '%04d' % image_id image_id += 1 image_item = dict() image_item[’id’] = int(img_id) # image_item[’id’] = image_id image_item[’file_name’] = file_name image_item[’width’] = size[’width’] image_item[’height’] = size[’height’] coco[’images’].append(image_item) image_set.add(file_name) return image_iddef addAnnoItem(object_name, image_id, category_id, bbox): global annotation_id annotation_item = dict() annotation_item[’segmentation’] = [] seg = [] # bbox[] is x,y,w,h # left_top seg.append(bbox[0]) seg.append(bbox[1]) # left_bottom seg.append(bbox[0]) seg.append(bbox[1] + bbox[3]) # right_bottom seg.append(bbox[0] + bbox[2]) seg.append(bbox[1] + bbox[3]) # right_top seg.append(bbox[0] + bbox[2]) seg.append(bbox[1]) annotation_item[’segmentation’].append(seg) annotation_item[’area’] = bbox[2] * bbox[3] annotation_item[’iscrowd’] = 0 annotation_item[’ignore’] = 0 annotation_item[’image_id’] = image_id annotation_item[’bbox’] = bbox annotation_item[’category_id’] = category_id annotation_id += 1 annotation_item[’id’] = annotation_id coco[’annotations’].append(annotation_item)def parseXmlFiles(xml_path): for f in os.listdir(xml_path): if not f.endswith(’.xml’): continue bndbox = dict() size = dict() current_image_id = None current_category_id = None file_name = None size[’width’] = None size[’height’] = None size[’depth’] = None xml_file = os.path.join(xml_path, f) # print(xml_file) tree = ET.parse(xml_file) root = tree.getroot() if root.tag != ’annotation’: raise Exception(’pascal voc xml root element should be annotation, rather than {}’.format(root.tag)) # elem is <folder>, <filename>, <size>, <object> for elem in root: current_parent = elem.tag current_sub = None object_name = None if elem.tag == ’folder’: continue if elem.tag == ’filename’: file_name = elem.text if file_name in category_set: raise Exception(’file_name duplicated’) # add img item only after parse <size> tag elif current_image_id is None and file_name is not None and size[’width’] is not None: if file_name not in image_set: current_image_id = addImgItem(file_name, size) # print(’add image with {} and {}’.format(file_name, size)) else: raise Exception(’duplicated image: {}’.format(file_name)) # subelem is <width>, <height>, <depth>, <name>, <bndbox> for subelem in elem: bndbox[’xmin’] = None bndbox[’xmax’] = None bndbox[’ymin’] = None bndbox[’ymax’] = None current_sub = subelem.tag if current_parent == ’object’ and subelem.tag == ’name’: object_name = subelem.text if object_name not in category_set: current_category_id = addCatItem(object_name) else: current_category_id = category_set[object_name] elif current_parent == ’size’: if size[subelem.tag] is not None: raise Exception(’xml structure broken at size tag.’) size[subelem.tag] = int(subelem.text) # option is <xmin>, <ymin>, <xmax>, <ymax>, when subelem is <bndbox> for option in subelem: if current_sub == ’bndbox’: if bndbox[option.tag] is not None: raise Exception(’xml structure corrupted at bndbox tag.’) bndbox[option.tag] = int(option.text) # only after parse the <object> tag if bndbox[’xmin’] is not None: if object_name is None: raise Exception(’xml structure broken at bndbox tag’) if current_image_id is None: raise Exception(’xml structure broken at bndbox tag’) if current_category_id is None: raise Exception(’xml structure broken at bndbox tag’) bbox = [] # x bbox.append(bndbox[’xmin’]) # y bbox.append(bndbox[’ymin’]) # w bbox.append(bndbox[’xmax’] - bndbox[’xmin’]) # h bbox.append(bndbox[’ymax’] - bndbox[’ymin’]) # print(’add annotation with {},{},{},{}’.format(object_name, current_image_id, current_category_id, # bbox)) addAnnoItem(object_name, current_image_id, current_category_id, bbox)if __name__ == ’__main__’:#修改這里的兩個地址,一個是xml文件的父目錄;一個是生成的json文件的絕對路徑 xml_path = r’G:datasetCOCOpersoncoco_val2014annotations’ json_file = r’G:datasetCOCOpersoncoco_val2014instances_val2014.json’ parseXmlFiles(xml_path) json.dump(coco, open(json_file, ’w’))

3.python提取Pascal Voc數據集中特定的類

# -*- coding: utf-8 -*-# @Function:There are 20 classes in VOC data set. If you need to extract specific classes, you can use this program to extract them. import osimport shutilann_filepath=’E:/VOCdevkit/VOC2012/Annotations/’img_filepath=’E:/VOCdevkit/VOC2012/JPEGImages/’img_savepath=’E:TrafficDatasets/JPEGImages/’ann_savepath=’E:TrafficDatasets/Annotations/’if not os.path.exists(img_savepath): os.mkdir(img_savepath) if not os.path.exists(ann_savepath): os.mkdir(ann_savepath)names = locals()classes = [’aeroplane’,’bicycle’,’bird’, ’boat’, ’bottle’, ’bus’, ’car’, ’cat’, ’chair’, ’cow’,’diningtable’, ’dog’, ’horse’, ’motorbike’, ’pottedplant’, ’sheep’, ’sofa’, ’train’, ’tvmonitor’, ’person’] for file in os.listdir(ann_filepath): print(file) fp = open(ann_filepath + ’’ + file) #打開Annotations文件 ann_savefile=ann_savepath+file fp_w = open(ann_savefile, ’w’) lines = fp.readlines() ind_start = [] ind_end = [] lines_id_start = lines[:] lines_id_end = lines[:] classes1 = ’tt<name>bicycle</name>n’ classes2 = ’tt<name>bus</name>n’ classes3 = ’tt<name>car</name>n’ classes4 = ’tt<name>motorbike</name>n’ classes5 = ’tt<name>train</name>n’ #在xml中找到object塊,并將其記錄下來 while 't<object>n' in lines_id_start: a = lines_id_start.index('t<object>n') ind_start.append(a) #ind_start是<object>的行數 lines_id_start[a] = 'delete' while 't</object>n' in lines_id_end: b = lines_id_end.index('t</object>n') ind_end.append(b) #ind_end是</object>的行數 lines_id_end[b] = 'delete' #names中存放所有的object塊 i = 0 for k in range(0, len(ind_start)): names[’block%d’ % k] = [] for j in range(0, len(classes)): if classes[j] in lines[ind_start[i] + 1]: a = ind_start[i] for o in range(ind_end[i] - ind_start[i] + 1): names[’block%d’ % k].append(lines[a + o]) break i += 1 #print(names[’block%d’ % k]) #xml頭 string_start = lines[0:ind_start[0]] #xml尾 if((file[2:4]==’09’) | (file[2:4]==’10’) | (file[2:4]==’11’)): string_end = lines[(len(lines) - 11):(len(lines))] else: string_end = [lines[len(lines) - 1]] #在給定的類中搜索,若存在則,寫入object塊信息 a = 0 for k in range(0, len(ind_start)): if classes1 in names[’block%d’ % k]: a += 1 string_start += names[’block%d’ % k] if classes2 in names[’block%d’ % k]: a += 1 string_start += names[’block%d’ % k] if classes3 in names[’block%d’ % k]: a += 1 string_start += names[’block%d’ % k] if classes4 in names[’block%d’ % k]: a += 1 string_start += names[’block%d’ % k] if classes5 in names[’block%d’ % k]: a += 1 string_start += names[’block%d’ % k] string_start += string_end # print(string_start) for c in range(0, len(string_start)): fp_w.write(string_start[c]) fp_w.close() #如果沒有我們尋找的模塊,則刪除此xml,有的話拷貝圖片 if a == 0: os.remove(ann_savepath+file) else: name_img = img_filepath + os.path.splitext(file)[0] + '.jpg' shutil.copy(name_img, img_savepath) fp.close()

以上這篇python實現提取COCO,VOC數據集中特定的類就是小編分享給大家的全部內容了,希望能給大家一個參考,也希望大家多多支持好吧啦網。

標簽: Python 編程
主站蜘蛛池模板: 碳刷_刷握_集电环_恒压簧_电刷厂家-上海丹臻机电科技有限公司 | 顶空进样器-吹扫捕集仪-热脱附仪-二次热解吸仪-北京华盛谱信仪器 | 杭州高温泵_热水泵_高温油泵|昆山奥兰克泵业制造有限公司 | 一体化污水处理设备-一体化净水设备-「山东梦之洁水处理」 | 四探针电阻率测试仪-振实密度仪-粉末流动性测定仪-宁波瑞柯微智能 | 自动化生产线-自动化装配线-直流电机自动化生产线-东莞市慧百自动化有限公司 | 湖南印刷厂|长沙印刷公司|画册印刷|挂历印刷|台历印刷|杂志印刷-乐成印刷 | 上海公司注册-代理记账-招投标审计-上海昆仑扇财税咨询有限公司 上海冠顶工业设备有限公司-隧道炉,烘箱,UV固化机,涂装设备,高温炉,工业机器人生产厂家 | 上海小程序开发-小程序制作-上海小程序定制开发公司-微信商城小程序-上海咏熠 | 铣床|万能铣床|立式铣床|数控铣床|山东滕州万友机床有限公司 | 披萨石_披萨盘_电器家电隔热绵加工定制_佛山市南海区西樵南方综合保温材料厂 | 三防漆–水性三防漆–水性浸渍漆–贝塔三防漆厂家 | 东莞海恒试验仪器设备有限公司 | 拉力测试机|材料拉伸试验机|电子拉力机价格|万能试验机厂家|苏州皖仪实验仪器有限公司 | 菏泽商标注册_菏泽版权登记_商标申请代理_菏泽商标注册去哪里 | 污水处理设备维修_污水处理工程改造_机械格栅_过滤设备_气浮设备_刮吸泥机_污泥浓缩罐_污水处理设备_污水处理工程-北京龙泉新禹科技有限公司 | 山东风淋室_201/304不锈钢风淋室净化设备厂家-盛之源风淋室厂家 翻斗式矿车|固定式矿车|曲轨侧卸式矿车|梭式矿车|矿车配件-山东卓力矿车生产厂家 | 安徽泰科检测科技有限公司【官方网站】| 阴离子聚丙烯酰胺价格_PAM_高分子聚丙烯酰胺厂家-河南泰航净水材料有限公司 | 艺术涂料_进口艺术涂料_艺术涂料加盟_艺术涂料十大品牌 -英国蒙太奇艺术涂料 | 聚合氯化铝价格_聚合氯化铝厂家_pac絮凝剂-唐达净水官网 | 警方提醒:赣州约炮论坛真的安全吗?2025年新手必看的网络交友防坑指南 | 空气能暖气片,暖气片厂家,山东暖气片,临沂暖气片-临沂永超暖通设备有限公司 | 高压负荷开关-苏州雷尔沃电器有限公司 | 好看的韩国漫画_韩漫在线免费阅读-汗汗漫画| 植筋胶-粘钢胶-碳纤维布-碳纤维板-环氧砂浆-加固材料生产厂家-上海巧力建筑科技有限公司 | 北京亦庄厂房出租_经开区产业园招商信息平台 | 泥沙分离_泥沙分离设备_泥砂分离机_洛阳隆中重工机械有限公司 | 塑料薄膜_PP薄膜_聚乙烯薄膜-常州市鑫美新材料包装厂 | 广州/东莞小字符喷码机-热转印打码机-喷码机厂家-广州瑞润科技 | 郑州宣传片拍摄-TVC广告片拍摄-微电影短视频制作-河南优柿文化传媒有限公司 | 广州昊至泉水上乐园设备有限公司| 上海单片机培训|重庆曙海培训分支机构—CortexM3+uC/OS培训班,北京linux培训,Windows驱动开发培训|上海IC版图设计,西安linux培训,北京汽车电子EMC培训,ARM培训,MTK培训,Android培训 | 磁棒电感生产厂家-电感器厂家-电感定制-贴片功率电感供应商-棒形电感生产厂家-苏州谷景电子有限公司 | 等离子表面处理机-等离子表面活化机-真空等离子清洗机-深圳市东信高科自动化设备有限公司 | 合肥花魁情感婚姻咨询中心_挽回爱情_修复婚姻_恋爱指南 | 油液红外光谱仪-油液监测系统-燃油嗅探仪-上海冉超光电科技有限公司 | 琉璃瓦-琉璃瓦厂家-安徽盛阳新型建材科技有限公司 | 原子吸收设备-国产分光光度计-光谱分光光度计-上海光谱仪器有限公司 | 长江船运_国内海运_内贸船运_大件海运|运输_船舶运输价格_钢材船运_内河运输_风电甲板船_游艇运输_航运货代电话_上海交航船运 | 时代北利离心机,实验室离心机,医用离心机,低速离心机DT5-2,美国SKC采样泵-上海京工实业有限公司 工业电炉,台车式电炉_厂家-淄博申华工业电炉有限公司 |