怎么把html文件转换成xml(txt版本的标签转换成PASCAL)
- from xml.dom import minidom,Node
- import cv2
- import string
- res=r'D:\workstation\POD page object detection 2017\Train\Train\Image\\'
- list = r'D:\workstation\POD page object detection 2017\Train\Train\Image\nameLIST.txt'
- parsedLabel=r'D:\workstation\POD page object detection 2017\Train\Train\parsedLabel\\'
- savePath = r'D:\workstation\POD page object detection 2017\Train\Train\VOCVMLLabel\\'
- f = open(list,'r')
- count = 0
- lines = f.readlines()
- for line in lines[:]:
- name = line.strip()
- name = "00" name[-8:-4] '.jpg'
- im = cv2.imread(res line.strip(),1)
- w = im.shape[1]
- h = im.shape[0]
- d = im.shape[2]
- #print w,h,d
- doc = minidom.Document()
- annotation = doc.createElement('annotation')
- doc.appendChild(annotation)
- folder = doc.createElement('folder')
- folder.appendChild(doc.createTextNode("POD2017"))
- annotation.appendChild(folder)
- filename = doc.createElement('filename')
- filename.appendChild(doc.createTextNode(name))
- annotation.appendChild(filename)
- source = doc.createElement('source')
- database = doc.createElement('database')
- database.appendChild(doc.createTextNode("The POD2017 Database"))
- source.appendChild(database)
- annotation2 = doc.createElement('annotation')
- annotation2.appendChild(doc.createTextNode("ICDAR POD2017"))
- source.appendChild(annotation2)
- image = doc.createElement('image')
- image.appendChild(doc.createTextNode("image"))
- source.appendChild(image)
- flickrid = doc.createElement('flickrid')
- flickrid.appendChild(doc.createTextNode("NULL"))
- source.appendChild(flickrid)
- annotation.appendChild(source)
- owner = doc.createElement('owner')
- flickrid = doc.createElement('flickrid')
- flickrid.appendChild(doc.createTextNode("NULL"))
- owner.appendChild(flickrid)
- na = doc.createElement('name')
- na.appendChild(doc.createTextNode("cxm"))
- owner.appendChild(na)
- annotation.appendChild(owner)
- size = doc.createElement('size')
- width = doc.createElement('width')
- width.appendChild(doc.createTextNode("%d" %w))
- size.appendChild(width)
- height = doc.createElement('height')
- height.appendChild(doc.createTextNode("%d" %h))
- size.appendChild(height)
- depth = doc.createElement('depth')
- depth.appendChild(doc.createTextNode("%d" %d))
- size.appendChild(depth)
- annotation.appendChild(size)
- segmented = doc.createElement('segmented')
- segmented.appendChild(doc.createTextNode("0"))
- annotation.appendChild(segmented)
- txtLabel = open(parsedLabel name[:-4] '.txt','r')
- boxes = txtLabel.readlines()
- for box in boxes:
- box = box.strip().split(' ')
- object = doc.createElement('object')
- nm = doc.createElement('name')
- nm.appendChild(doc.createTextNode(box[0]))
- object.appendChild(nm)
- pose = doc.createElement('pose')
- pose.appendChild(doc.createTextNode("undefined"))
- object.appendChild(pose)
- truncated = doc.createElement('truncated')
- truncated.appendChild(doc.createTextNode("0"))
- object.appendChild(truncated)
- difficult = doc.createElement('difficult')
- difficult.appendChild(doc.createTextNode("0"))
- object.appendChild(difficult)
- bndbox = doc.createElement('bndbox')
- xmin = doc.createElement('xmin')
- xmin.appendChild(doc.createTextNode(box[1]))
- bndbox.appendChild(xmin)
- ymin = doc.createElement('ymin')
- ymin.appendChild(doc.createTextNode(box[2]))
- bndbox.appendChild(ymin)
- xmax = doc.createElement('xmax')
- xmax.appendChild(doc.createTextNode(box[3]))
- bndbox.appendChild(xmax)
- ymax = doc.createElement('ymax')
- ymax.appendChild(doc.createTextNode(box[4]))
- bndbox.appendChild(ymax)
- object.appendChild(bndbox)
- annotation.appendChild(object)
- savefile = open(savePath name[:-4] '.XML','w')
- savefile.write(doc.toprettyxml(encoding='utf-8'))
- savefile.close()
- count = 1
- print count
源格式 与 生成的格式 对比如下:
[python] view plain copy
,
免责声明:本文仅代表文章作者的个人观点,与本站无关。其原创性、真实性以及文中陈述文字和内容未经本站证实,对本文以及其中全部或者部分内容文字的真实性、完整性和原创性本站不作任何保证或承诺,请读者仅作参考,并自行核实相关内容。文章投诉邮箱:anhduc.ph@yahoo.com