怎么把html文件转换成xml(txt版本的标签转换成PASCAL)

  1. from xml.dom import minidom,Node
  2. import cv2
  3. import string
  4. res=r'D:\workstation\POD page object detection 2017\Train\Train\Image\\'
  5. list = r'D:\workstation\POD page object detection 2017\Train\Train\Image\nameLIST.txt'
  6. parsedLabel=r'D:\workstation\POD page object detection 2017\Train\Train\parsedLabel\\'
  7. savePath = r'D:\workstation\POD page object detection 2017\Train\Train\VOCVMLLabel\\'
  8. f = open(list,'r')
  9. count = 0
  10. lines = f.readlines()
  11. for line in lines[:]:
  12. name = line.strip()
  13. name = "00" name[-8:-4] '.jpg'
  14. im = cv2.imread(res line.strip(),1)
  15. w = im.shape[1]
  16. h = im.shape[0]
  17. d = im.shape[2]
  18. #print w,h,d
  19. doc = minidom.Document()
  20. annotation = doc.createElement('annotation')
  21. doc.appendChild(annotation)
  22. folder = doc.createElement('folder')
  23. folder.appendChild(doc.createTextNode("POD2017"))
  24. annotation.appendChild(folder)
  25. filename = doc.createElement('filename')
  26. filename.appendChild(doc.createTextNode(name))
  27. annotation.appendChild(filename)
  28. source = doc.createElement('source')
  29. database = doc.createElement('database')
  30. database.appendChild(doc.createTextNode("The POD2017 Database"))
  31. source.appendChild(database)
  32. annotation2 = doc.createElement('annotation')
  33. annotation2.appendChild(doc.createTextNode("ICDAR POD2017"))
  34. source.appendChild(annotation2)
  35. image = doc.createElement('image')
  36. image.appendChild(doc.createTextNode("image"))
  37. source.appendChild(image)
  38. flickrid = doc.createElement('flickrid')
  39. flickrid.appendChild(doc.createTextNode("NULL"))
  40. source.appendChild(flickrid)
  41. annotation.appendChild(source)
  42. owner = doc.createElement('owner')
  43. flickrid = doc.createElement('flickrid')
  44. flickrid.appendChild(doc.createTextNode("NULL"))
  45. owner.appendChild(flickrid)
  46. na = doc.createElement('name')
  47. na.appendChild(doc.createTextNode("cxm"))
  48. owner.appendChild(na)
  49. annotation.appendChild(owner)
  50. size = doc.createElement('size')
  51. width = doc.createElement('width')
  52. width.appendChild(doc.createTextNode("%d" %w))
  53. size.appendChild(width)
  54. height = doc.createElement('height')
  55. height.appendChild(doc.createTextNode("%d" %h))
  56. size.appendChild(height)
  57. depth = doc.createElement('depth')
  58. depth.appendChild(doc.createTextNode("%d" %d))
  59. size.appendChild(depth)
  60. annotation.appendChild(size)
  61. segmented = doc.createElement('segmented')
  62. segmented.appendChild(doc.createTextNode("0"))
  63. annotation.appendChild(segmented)
  64. txtLabel = open(parsedLabel name[:-4] '.txt','r')
  65. boxes = txtLabel.readlines()
  66. for box in boxes:
  67. box = box.strip().split(' ')
  68. object = doc.createElement('object')
  69. nm = doc.createElement('name')
  70. nm.appendChild(doc.createTextNode(box[0]))
  71. object.appendChild(nm)
  72. pose = doc.createElement('pose')
  73. pose.appendChild(doc.createTextNode("undefined"))
  74. object.appendChild(pose)
  75. truncated = doc.createElement('truncated')
  76. truncated.appendChild(doc.createTextNode("0"))
  77. object.appendChild(truncated)
  78. difficult = doc.createElement('difficult')
  79. difficult.appendChild(doc.createTextNode("0"))
  80. object.appendChild(difficult)
  81. bndbox = doc.createElement('bndbox')
  82. xmin = doc.createElement('xmin')
  83. xmin.appendChild(doc.createTextNode(box[1]))
  84. bndbox.appendChild(xmin)
  85. ymin = doc.createElement('ymin')
  86. ymin.appendChild(doc.createTextNode(box[2]))
  87. bndbox.appendChild(ymin)
  88. xmax = doc.createElement('xmax')
  89. xmax.appendChild(doc.createTextNode(box[3]))
  90. bndbox.appendChild(xmax)
  91. ymax = doc.createElement('ymax')
  92. ymax.appendChild(doc.createTextNode(box[4]))
  93. bndbox.appendChild(ymax)
  94. object.appendChild(bndbox)
  95. annotation.appendChild(object)
  96. savefile = open(savePath name[:-4] '.XML','w')
  97. savefile.write(doc.toprettyxml(encoding='utf-8'))
  98. savefile.close()
  99. count = 1
  100. print count

源格式 与 生成的格式 对比如下:

[python] view plain copy

怎么把html文件转换成xml(txt版本的标签转换成PASCAL)(1)

,

免责声明:本文仅代表文章作者的个人观点,与本站无关。其原创性、真实性以及文中陈述文字和内容未经本站证实,对本文以及其中全部或者部分内容文字的真实性、完整性和原创性本站不作任何保证或承诺,请读者仅作参考,并自行核实相关内容。文章投诉邮箱:anhduc.ph@yahoo.com

    分享
    投诉
    首页