如何通过Python提取奇怪的XML表
xml文件中有以下代码,我想通过python提取xml文件的表:如何通过Python提取奇怪的XML表,python,xml,parsing,extraction,xmltable,Python,Xml,Parsing,Extraction,Xmltable,xml文件中有以下代码,我想通过python提取xml文件的表: ".... <Test_input_data> <web_pins type="list" orientation="column"> <web_pin_id type="column_definition" data_type="int" index="1"/> <pin_center_node type="column_definition"
"....
<Test_input_data>
<web_pins type="list" orientation="column">
<web_pin_id type="column_definition" data_type="int" index="1"/>
<pin_center_node type="column_definition" data_type="int" index="2"/>
<journal_center_node type="column_definition" data_type="int" index="3"/>
1 1012 5011
2 2012 5012
3 3012 5013
</web_pins>
</Test_input_data>
...."
但是我不知道如何访问这些值,也许您可以帮助我?我建议您使用。(当然,还存在其他解析器,例如)
使用它非常简单,使用函数从XML创建一个元素实例,您就可以开始了。我建议您使用。(当然,还存在其他解析器,例如)
使用它非常简单,使用函数从XML创建一个元素实例,您就可以开始了。您可以使用,下面是一个示例:
from bs4 import BeautifulSoup as bs
xml = """<Test_input_data>
<web_pins type="list" orientation="column">
<web_pin_id type="column_definition" data_type="int" index="1"/>
<pin_center_node type="column_definition" data_type="int" index="2"/>
<journal_center_node type="column_definition" data_type="int" index="3"/>
1 1012 5011
2 2012 5012
3 3012 5013
</web_pins>
</Test_input_data>"""
soup = bs(xml)
tag = soup.find("web_pins")
text = tag.text #Here you get your text!
##P.S. you can also use:
soup.web_pins.text
from HTMLParser import HTMLParser
class MyHTMLParser(HTMLParser):
data = ""
search_tag = None
grab_data = False
def feed(self, data, tag_to_search_for):
self.search_tag = tag_to_search_for
HTMLParser.feed(self, data)
def handle_starttag(self, tag, attrs):
if tag == self.search_tag:
self.grab_data = 1
def handle_data(self, data):
if self.grab_data:
self.data = data
def handle_endtag(self, tag):
if tag == self.search_tag:
self.grab_data = 0
xml = """<Test_input_data>
<web_pins type="list" orientation="column">
<web_pin_id type="column_definition" data_type="int" index="1"/>
<pin_center_node type="column_definition" data_type="int" index="2"/>
<journal_center_node type="column_definition" data_type="int" index="3"/>
1 1012 5011
2 2012 5012
3 3012 5013
</web_pins>
</Test_input_data>"""
parser = MyHTMLParser()
parser.feed(xml, "web_pins")
print parser.data #Ta-daa!
从bs4导入美化组作为bs
xml=”“”
1 1012 5011
2 2012 5012
3 3012 5013
"""
soup=bs(xml)
tag=soup.find(“web_pins”)
text=tag.text#这是你的文本!
##另外,您还可以使用:
soup.web_pins.text
另外,你可能想看看斯坦德的图书馆。 同样,这里有一个例子:
from bs4 import BeautifulSoup as bs
xml = """<Test_input_data>
<web_pins type="list" orientation="column">
<web_pin_id type="column_definition" data_type="int" index="1"/>
<pin_center_node type="column_definition" data_type="int" index="2"/>
<journal_center_node type="column_definition" data_type="int" index="3"/>
1 1012 5011
2 2012 5012
3 3012 5013
</web_pins>
</Test_input_data>"""
soup = bs(xml)
tag = soup.find("web_pins")
text = tag.text #Here you get your text!
##P.S. you can also use:
soup.web_pins.text
from HTMLParser import HTMLParser
class MyHTMLParser(HTMLParser):
data = ""
search_tag = None
grab_data = False
def feed(self, data, tag_to_search_for):
self.search_tag = tag_to_search_for
HTMLParser.feed(self, data)
def handle_starttag(self, tag, attrs):
if tag == self.search_tag:
self.grab_data = 1
def handle_data(self, data):
if self.grab_data:
self.data = data
def handle_endtag(self, tag):
if tag == self.search_tag:
self.grab_data = 0
xml = """<Test_input_data>
<web_pins type="list" orientation="column">
<web_pin_id type="column_definition" data_type="int" index="1"/>
<pin_center_node type="column_definition" data_type="int" index="2"/>
<journal_center_node type="column_definition" data_type="int" index="3"/>
1 1012 5011
2 2012 5012
3 3012 5013
</web_pins>
</Test_input_data>"""
parser = MyHTMLParser()
parser.feed(xml, "web_pins")
print parser.data #Ta-daa!
从HTMLParser导入HTMLParser
类MyHtmlPasser(HtmlPasser):
data=“”
搜索标签=无
抓取数据=错误
def提要(自我、数据、标记到搜索):
self.search\u tag=要搜索的标记
HTMLParser.feed(self,data)
def句柄\u开始标记(自身、标记、属性):
如果标记==self.search\u标记:
self.grab_data=1
def句柄_数据(自身、数据):
如果self.grab\u数据:
self.data=数据
def handle_endtag(self,tag):
如果标记==self.search\u标记:
self.grab_data=0
xml=”“”
1 1012 5011
2 2012 5012
3 3012 5013
"""
parser=MyHTMLParser()
feed(xml,“web_pins”)
打印parser.data#Ta daa!
您可以使用,下面是一个示例:
from bs4 import BeautifulSoup as bs
xml = """<Test_input_data>
<web_pins type="list" orientation="column">
<web_pin_id type="column_definition" data_type="int" index="1"/>
<pin_center_node type="column_definition" data_type="int" index="2"/>
<journal_center_node type="column_definition" data_type="int" index="3"/>
1 1012 5011
2 2012 5012
3 3012 5013
</web_pins>
</Test_input_data>"""
soup = bs(xml)
tag = soup.find("web_pins")
text = tag.text #Here you get your text!
##P.S. you can also use:
soup.web_pins.text
from HTMLParser import HTMLParser
class MyHTMLParser(HTMLParser):
data = ""
search_tag = None
grab_data = False
def feed(self, data, tag_to_search_for):
self.search_tag = tag_to_search_for
HTMLParser.feed(self, data)
def handle_starttag(self, tag, attrs):
if tag == self.search_tag:
self.grab_data = 1
def handle_data(self, data):
if self.grab_data:
self.data = data
def handle_endtag(self, tag):
if tag == self.search_tag:
self.grab_data = 0
xml = """<Test_input_data>
<web_pins type="list" orientation="column">
<web_pin_id type="column_definition" data_type="int" index="1"/>
<pin_center_node type="column_definition" data_type="int" index="2"/>
<journal_center_node type="column_definition" data_type="int" index="3"/>
1 1012 5011
2 2012 5012
3 3012 5013
</web_pins>
</Test_input_data>"""
parser = MyHTMLParser()
parser.feed(xml, "web_pins")
print parser.data #Ta-daa!
从bs4导入美化组作为bs
xml=”“”
1 1012 5011
2 2012 5012
3 3012 5013
"""
soup=bs(xml)
tag=soup.find(“web_pins”)
text=tag.text#这是你的文本!
##另外,您还可以使用:
soup.web_pins.text
另外,你可能想看看斯坦德的图书馆。 同样,这里有一个例子:
from bs4 import BeautifulSoup as bs
xml = """<Test_input_data>
<web_pins type="list" orientation="column">
<web_pin_id type="column_definition" data_type="int" index="1"/>
<pin_center_node type="column_definition" data_type="int" index="2"/>
<journal_center_node type="column_definition" data_type="int" index="3"/>
1 1012 5011
2 2012 5012
3 3012 5013
</web_pins>
</Test_input_data>"""
soup = bs(xml)
tag = soup.find("web_pins")
text = tag.text #Here you get your text!
##P.S. you can also use:
soup.web_pins.text
from HTMLParser import HTMLParser
class MyHTMLParser(HTMLParser):
data = ""
search_tag = None
grab_data = False
def feed(self, data, tag_to_search_for):
self.search_tag = tag_to_search_for
HTMLParser.feed(self, data)
def handle_starttag(self, tag, attrs):
if tag == self.search_tag:
self.grab_data = 1
def handle_data(self, data):
if self.grab_data:
self.data = data
def handle_endtag(self, tag):
if tag == self.search_tag:
self.grab_data = 0
xml = """<Test_input_data>
<web_pins type="list" orientation="column">
<web_pin_id type="column_definition" data_type="int" index="1"/>
<pin_center_node type="column_definition" data_type="int" index="2"/>
<journal_center_node type="column_definition" data_type="int" index="3"/>
1 1012 5011
2 2012 5012
3 3012 5013
</web_pins>
</Test_input_data>"""
parser = MyHTMLParser()
parser.feed(xml, "web_pins")
print parser.data #Ta-daa!
从HTMLParser导入HTMLParser
类MyHtmlPasser(HtmlPasser):
data=“”
搜索标签=无
抓取数据=错误
def提要(自我、数据、标记到搜索):
self.search\u tag=要搜索的标记
HTMLParser.feed(self,data)
def句柄\u开始标记(自身、标记、属性):
如果标记==self.search\u标记:
self.grab_data=1
def句柄_数据(自身、数据):
如果self.grab\u数据:
self.data=数据
def handle_endtag(self,tag):
如果标记==self.search\u标记:
self.grab_data=0
xml=”“”
1 1012 5011
2 2012 5012
3 3012 5013
"""
parser=MyHTMLParser()
feed(xml,“web_pins”)
打印parser.data#Ta daa!
使用Python XML解析器自助。使用Python XML解析器自助。