Warning: file_get_contents(/data/phpspider/zhask/data//catemap/0/xml/14.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
如何通过Python提取奇怪的XML表_Python_Xml_Parsing_Extraction_Xmltable - Fatal编程技术网

如何通过Python提取奇怪的XML表

如何通过Python提取奇怪的XML表,python,xml,parsing,extraction,xmltable,Python,Xml,Parsing,Extraction,Xmltable,xml文件中有以下代码,我想通过python提取xml文件的表: ".... <Test_input_data> <web_pins type="list" orientation="column"> <web_pin_id type="column_definition" data_type="int" index="1"/> <pin_center_node type="column_definition"

xml文件中有以下代码,我想通过python提取xml文件的表:

"....

<Test_input_data>
    <web_pins type="list" orientation="column">
        <web_pin_id type="column_definition" data_type="int" index="1"/>
        <pin_center_node type="column_definition" data_type="int" index="2"/>
        <journal_center_node type="column_definition" data_type="int" index="3"/>
         1  1012  5011
         2  2012  5012
         3  3012  5013
    </web_pins>
</Test_input_data>

...."
但是我不知道如何访问这些值,也许您可以帮助我?

我建议您使用。(当然,还存在其他解析器,例如)

使用它非常简单,使用函数从XML创建一个元素实例,您就可以开始了。

我建议您使用。(当然,还存在其他解析器,例如)

使用它非常简单,使用函数从XML创建一个元素实例,您就可以开始了。

您可以使用,下面是一个示例:

from bs4 import BeautifulSoup as bs

xml = """<Test_input_data>
    <web_pins type="list" orientation="column">
        <web_pin_id type="column_definition" data_type="int" index="1"/>
        <pin_center_node type="column_definition" data_type="int" index="2"/>
        <journal_center_node type="column_definition" data_type="int" index="3"/>
         1  1012  5011
         2  2012  5012
         3  3012  5013
    </web_pins>
</Test_input_data>"""
soup = bs(xml)
tag = soup.find("web_pins")
text = tag.text #Here you get your text!
##P.S. you can also use:
soup.web_pins.text
from HTMLParser import HTMLParser

class MyHTMLParser(HTMLParser):

    data = ""
    search_tag = None
    grab_data = False

    def feed(self, data, tag_to_search_for):

        self.search_tag = tag_to_search_for
        HTMLParser.feed(self, data)

    def handle_starttag(self, tag, attrs):

        if tag == self.search_tag:
            self.grab_data = 1

    def handle_data(self, data):

        if self.grab_data:
            self.data = data

    def handle_endtag(self, tag):

        if tag == self.search_tag:
            self.grab_data = 0

xml = """<Test_input_data>
    <web_pins type="list" orientation="column">
        <web_pin_id type="column_definition" data_type="int" index="1"/>
        <pin_center_node type="column_definition" data_type="int" index="2"/>
        <journal_center_node type="column_definition" data_type="int" index="3"/>
         1  1012  5011
         2  2012  5012
         3  3012  5013
    </web_pins>
</Test_input_data>"""

parser = MyHTMLParser()
parser.feed(xml, "web_pins")
print parser.data #Ta-daa!
从bs4导入美化组作为bs
xml=”“”
1  1012  5011
2  2012  5012
3  3012  5013
"""
soup=bs(xml)
tag=soup.find(“web_pins”)
text=tag.text#这是你的文本!
##另外,您还可以使用:
soup.web_pins.text

另外,你可能想看看斯坦德的图书馆。 同样,这里有一个例子:

from bs4 import BeautifulSoup as bs

xml = """<Test_input_data>
    <web_pins type="list" orientation="column">
        <web_pin_id type="column_definition" data_type="int" index="1"/>
        <pin_center_node type="column_definition" data_type="int" index="2"/>
        <journal_center_node type="column_definition" data_type="int" index="3"/>
         1  1012  5011
         2  2012  5012
         3  3012  5013
    </web_pins>
</Test_input_data>"""
soup = bs(xml)
tag = soup.find("web_pins")
text = tag.text #Here you get your text!
##P.S. you can also use:
soup.web_pins.text
from HTMLParser import HTMLParser

class MyHTMLParser(HTMLParser):

    data = ""
    search_tag = None
    grab_data = False

    def feed(self, data, tag_to_search_for):

        self.search_tag = tag_to_search_for
        HTMLParser.feed(self, data)

    def handle_starttag(self, tag, attrs):

        if tag == self.search_tag:
            self.grab_data = 1

    def handle_data(self, data):

        if self.grab_data:
            self.data = data

    def handle_endtag(self, tag):

        if tag == self.search_tag:
            self.grab_data = 0

xml = """<Test_input_data>
    <web_pins type="list" orientation="column">
        <web_pin_id type="column_definition" data_type="int" index="1"/>
        <pin_center_node type="column_definition" data_type="int" index="2"/>
        <journal_center_node type="column_definition" data_type="int" index="3"/>
         1  1012  5011
         2  2012  5012
         3  3012  5013
    </web_pins>
</Test_input_data>"""

parser = MyHTMLParser()
parser.feed(xml, "web_pins")
print parser.data #Ta-daa!
从HTMLParser导入HTMLParser
类MyHtmlPasser(HtmlPasser):
data=“”
搜索标签=无
抓取数据=错误
def提要(自我、数据、标记到搜索):
self.search\u tag=要搜索的标记
HTMLParser.feed(self,data)
def句柄\u开始标记(自身、标记、属性):
如果标记==self.search\u标记:
self.grab_data=1
def句柄_数据(自身、数据):
如果self.grab\u数据:
self.data=数据
def handle_endtag(self,tag):
如果标记==self.search\u标记:
self.grab_data=0
xml=”“”
1  1012  5011
2  2012  5012
3  3012  5013
"""
parser=MyHTMLParser()
feed(xml,“web_pins”)
打印parser.data#Ta daa!
您可以使用,下面是一个示例:

from bs4 import BeautifulSoup as bs

xml = """<Test_input_data>
    <web_pins type="list" orientation="column">
        <web_pin_id type="column_definition" data_type="int" index="1"/>
        <pin_center_node type="column_definition" data_type="int" index="2"/>
        <journal_center_node type="column_definition" data_type="int" index="3"/>
         1  1012  5011
         2  2012  5012
         3  3012  5013
    </web_pins>
</Test_input_data>"""
soup = bs(xml)
tag = soup.find("web_pins")
text = tag.text #Here you get your text!
##P.S. you can also use:
soup.web_pins.text
from HTMLParser import HTMLParser

class MyHTMLParser(HTMLParser):

    data = ""
    search_tag = None
    grab_data = False

    def feed(self, data, tag_to_search_for):

        self.search_tag = tag_to_search_for
        HTMLParser.feed(self, data)

    def handle_starttag(self, tag, attrs):

        if tag == self.search_tag:
            self.grab_data = 1

    def handle_data(self, data):

        if self.grab_data:
            self.data = data

    def handle_endtag(self, tag):

        if tag == self.search_tag:
            self.grab_data = 0

xml = """<Test_input_data>
    <web_pins type="list" orientation="column">
        <web_pin_id type="column_definition" data_type="int" index="1"/>
        <pin_center_node type="column_definition" data_type="int" index="2"/>
        <journal_center_node type="column_definition" data_type="int" index="3"/>
         1  1012  5011
         2  2012  5012
         3  3012  5013
    </web_pins>
</Test_input_data>"""

parser = MyHTMLParser()
parser.feed(xml, "web_pins")
print parser.data #Ta-daa!
从bs4导入美化组作为bs
xml=”“”
1  1012  5011
2  2012  5012
3  3012  5013
"""
soup=bs(xml)
tag=soup.find(“web_pins”)
text=tag.text#这是你的文本!
##另外,您还可以使用:
soup.web_pins.text

另外,你可能想看看斯坦德的图书馆。 同样,这里有一个例子:

from bs4 import BeautifulSoup as bs

xml = """<Test_input_data>
    <web_pins type="list" orientation="column">
        <web_pin_id type="column_definition" data_type="int" index="1"/>
        <pin_center_node type="column_definition" data_type="int" index="2"/>
        <journal_center_node type="column_definition" data_type="int" index="3"/>
         1  1012  5011
         2  2012  5012
         3  3012  5013
    </web_pins>
</Test_input_data>"""
soup = bs(xml)
tag = soup.find("web_pins")
text = tag.text #Here you get your text!
##P.S. you can also use:
soup.web_pins.text
from HTMLParser import HTMLParser

class MyHTMLParser(HTMLParser):

    data = ""
    search_tag = None
    grab_data = False

    def feed(self, data, tag_to_search_for):

        self.search_tag = tag_to_search_for
        HTMLParser.feed(self, data)

    def handle_starttag(self, tag, attrs):

        if tag == self.search_tag:
            self.grab_data = 1

    def handle_data(self, data):

        if self.grab_data:
            self.data = data

    def handle_endtag(self, tag):

        if tag == self.search_tag:
            self.grab_data = 0

xml = """<Test_input_data>
    <web_pins type="list" orientation="column">
        <web_pin_id type="column_definition" data_type="int" index="1"/>
        <pin_center_node type="column_definition" data_type="int" index="2"/>
        <journal_center_node type="column_definition" data_type="int" index="3"/>
         1  1012  5011
         2  2012  5012
         3  3012  5013
    </web_pins>
</Test_input_data>"""

parser = MyHTMLParser()
parser.feed(xml, "web_pins")
print parser.data #Ta-daa!
从HTMLParser导入HTMLParser
类MyHtmlPasser(HtmlPasser):
data=“”
搜索标签=无
抓取数据=错误
def提要(自我、数据、标记到搜索):
self.search\u tag=要搜索的标记
HTMLParser.feed(self,data)
def句柄\u开始标记(自身、标记、属性):
如果标记==self.search\u标记:
self.grab_data=1
def句柄_数据(自身、数据):
如果self.grab\u数据:
self.data=数据
def handle_endtag(self,tag):
如果标记==self.search\u标记:
self.grab_data=0
xml=”“”
1  1012  5011
2  2012  5012
3  3012  5013
"""
parser=MyHTMLParser()
feed(xml,“web_pins”)
打印parser.data#Ta daa!

使用Python XML解析器自助。使用Python XML解析器自助。