Python 如何在';对于';循环并使用它构建数据帧?
我正在写一个脚本来做一些探索性的分析。脚本引用ID的API,API用XML输出响应(不带子对象) 脚本:Python 如何在';对于';循环并使用它构建数据帧?,python,dataframe,xml-parsing,iterator,api-design,Python,Dataframe,Xml Parsing,Iterator,Api Design,我正在写一个脚本来做一些探索性的分析。脚本引用ID的API,API用XML输出响应(不带子对象) 脚本: import requests import xml.etree.ElementTree as et xml =''' <?xml version="1.0" encoding="UTF-8"?> <YM> <Version>xxx</Version> <ApiKey>xxx</ApiKey>
import requests
import xml.etree.ElementTree as et
xml ='''
<?xml version="1.0" encoding="UTF-8"?>
<YM>
<Version>xxx</Version>
<ApiKey>xxx</ApiKey>
<CallID>xxx</CallID>
<></>
<SaPasscode>xxxx</SaPasscode>
<Call Method = "GetIDs">
</Call>
</YM>
'''
headers = {'Content-Type': 'application/x-www-form-urlencoded'}
r = requests.post('url', data=xml, headers=headers)
def xml_event_info(eventID):
xml ='''
<?xml version="1.0" encoding="UTF-8"?>
<YourMembership>
<Version>xxx</Version>
<ApiKey>xxx</ApiKey>
<CallID>xxx</CallID>
<></>
<SaPasscode>xxx</SaPasscode>
<Call Method = "Profile.Get">
<ID>{}</ID>
</Call>
</YourMembership>
'''
headers = {'Content-Type': 'application/x-www-form-urlencoded'}
r = requests.post('url',
data=xml.format(eventID), headers=headers)
print(r.text)
# BUILD XML TREE OBJECT
dom = et.fromstring(r.text)
# PARSE EVENT ID TEXT AND PASS INTO FUNCTION
for i in dom.iterfind('.//ID'):
xml_event_info(i.text)
def xml2df():
tree = et.fromstring(xml_event_info(i.text))
root = tree.getroot()
all_records = []
headers = []
for i, child in enumerate(root):
record = []
for subchild in child:
record.append(subchild.text)
if subchild.tag not in headers:
headers.append(subchild.tag)
all_records.append(record)
return pd.DataFrame(all_records, columns=headers)
我试图将要映射到数据帧的xml映射到数据帧中,但不断得到错误'TypeError:Parse()参数1必须是字符串或只读缓冲区,而不是None'
如何将来自多个API调用的XML输出解析到数据框架中,其中每个XML标记都是数据框架的标头
Example:
---|ErrCode|ExtendedInfo|ID|FirstName----
我所指的完成工作的脚本和网站可以在这里找到()
脚本:
import requests
import xml.etree.ElementTree as et
xml ='''
<?xml version="1.0" encoding="UTF-8"?>
<YM>
<Version>xxx</Version>
<ApiKey>xxx</ApiKey>
<CallID>xxx</CallID>
<></>
<SaPasscode>xxxx</SaPasscode>
<Call Method = "GetIDs">
</Call>
</YM>
'''
headers = {'Content-Type': 'application/x-www-form-urlencoded'}
r = requests.post('url', data=xml, headers=headers)
def xml_event_info(eventID):
xml ='''
<?xml version="1.0" encoding="UTF-8"?>
<YourMembership>
<Version>xxx</Version>
<ApiKey>xxx</ApiKey>
<CallID>xxx</CallID>
<></>
<SaPasscode>xxx</SaPasscode>
<Call Method = "Profile.Get">
<ID>{}</ID>
</Call>
</YourMembership>
'''
headers = {'Content-Type': 'application/x-www-form-urlencoded'}
r = requests.post('url',
data=xml.format(eventID), headers=headers)
print(r.text)
# BUILD XML TREE OBJECT
dom = et.fromstring(r.text)
# PARSE EVENT ID TEXT AND PASS INTO FUNCTION
for i in dom.iterfind('.//ID'):
xml_event_info(i.text)
def xml2df():
tree = et.fromstring(xml_event_info(i.text))
root = tree.getroot()
all_records = []
headers = []
for i, child in enumerate(root):
record = []
for subchild in child:
record.append(subchild.text)
if subchild.tag not in headers:
headers.append(subchild.tag)
all_records.append(record)
return pd.DataFrame(all_records, columns=headers)
完整脚本:
import requests
import xml.etree.ElementTree as et
import pandas as pd
from lxml import etree
xml ='''
<?xml version="1.0" encoding="UTF-8"?>
<YourMembership>
<Version>xxx</Version>
<ApiKey>xxxx</ApiKey>
<CallID>xxx</CallID>
<></>
<SaPasscode>xxx</SaPasscode>
<Call Method = "Events.All.GetIDs">
<StartDate>2017/01/1</StartDate>
<EndDate>2017/01/31</EndDate>
</Call>
</YourMembership>
'''
headers = {'Content-Type': 'application/x-www-form-urlencoded'}
r = requests.post('url', data=xml, headers=headers)
def xml_event_info(eventID):
xml ='''
<?xml version="1.0" encoding="UTF-8"?>
<YourMembership>
<Version>xxx</Version>
<ApiKey>xxx</ApiKey>
<CallID>xxx</CallID>
<></>
<SaPasscode>xxx</SaPasscode>
<Call Method = "Event.Get">
<EventID>{}</EventID>
</Call>
</YourMembership>
'''
headers = {'Content-Type': 'application/x-www-form-urlencoded'}
r = requests.post('url',
data=xml.format(eventID), headers=headers)
print(r.text)
return r.text
# BUILD XML TREE OBJECT
dom = et.fromstring(r.text)
# PARSE EVENT ID TEXT AND PASS INTO FUNCTION
for i in dom.iterfind('.//EventID'):
y = xml_event_info(i.text)
for xml in y:
tree = et.fromstring(y)
root = tree.getchildren()
all_records = []
headers = []
for i , child in enumerate(root):
record = []
for subchild in child:
record.append(subchild.text)
if subchild.tag not in headers:
headers.append(subchild.tag)
all_records.append(record)
#print all_records
print pd.DataFrame(all_records, columns=headers)
导入请求
将xml.etree.ElementTree作为et导入
作为pd进口熊猫
从lxml导入etree
xml=“”
xxx
xxxx
xxx
xxx
2017/01/1
2017/01/31
'''
headers={'Content-Type':'application/x-www-form-urlencoded'}
r=requests.post('url',data=xml,headers=headers)
def xml_事件_信息(事件ID):
xml=“”
xxx
xxx
xxx
xxx
{}
'''
headers={'Content-Type':'application/x-www-form-urlencoded'}
r=请求。post('url',
data=xml.format(eventID),headers=headers)
打印(右文本)
返回r.text
#构建XML树对象
dom=et.fromstring(r.text)
#解析事件ID文本并传递到函数中
对于dom.iterfind('.//EventID')中的i:
y=xml\u事件\u信息(i.text)
对于y中的xml:
tree=et.fromstring(y)
root=tree.getchildren()
所有_记录=[]
标题=[]
对于i,枚举中的子级(根):
记录=[]
对于儿童中的子儿童:
record.append(subchild.text)
如果subchild.tag不在标题中:
headers.append(subchild.tag)
所有_记录。追加(记录)
#打印所有记录
打印pd.DataFrame(所有_记录,列=标题)
编辑:
TLDR:
如何将以下函数的输出映射到以xml元素作为数据框标题的数据框中:
import requests
import xml.etree.ElementTree as et
import pandas as pd
xml ='''
<?xml version="1.0" encoding="UTF-8"?>
<YourMembership>
<Version>xxx</Version>
<ApiKey>xxxx</ApiKey>
<CallID>xxx</CallID>
<></>
<SaPasscode>xxxx</SaPasscode>
<Call Method = "GetIDs">
</Call>
</YourMembership>
'''
headers = {'Content-Type': 'application/x-www-form-urlencoded'}
r = requests.post('url', data=xml, headers=headers)
def xml_event_info(eventID):
xml ='''
<?xml version="1.0" encoding="UTF-8"?>
<YourMembership>
<Version>xxx</Version>
<ApiKey>xxx</ApiKey>
<CallID>xxx</CallID>
<></>
<SaPasscode>xxx</SaPasscode>
<Call Method = "Profile.Get">
<ID>{}</ID>
</Call>
</YourMembership>
'''
headers = {'Content-Type': 'application/x-www-form-urlencoded'}
r = requests.post('url',
data=xml.format(eventID), headers=headers)
print(r.text)
导入请求
将xml.etree.ElementTree作为et导入
作为pd进口熊猫
xml=“”
xxx
xxxx
xxx
xxxx
'''
headers={'Content-Type':'application/x-www-form-urlencoded'}
r=requests.post('url',data=xml,headers=headers)
def xml_事件_信息(事件ID):
xml=“”
xxx
xxx
xxx
xxx
{}
'''
headers={'Content-Type':'application/x-www-form-urlencoded'}
r=请求。post('url',
data=xml.format(eventID),headers=headers)
打印(右文本)
输出:
<?xml version="1.0" encoding="utf-8" ?>
<Response>
<ErrCode>xxx</ErrCode>
<ExtendedErrorInfo>xxx</ExtendedErrorInfo>
<Profile.Get>
<ID>xxxx</ID>
<WebsiteID>xxxx</WebsiteID>
<EmailBounced>xxx</EmailBounced>
<NamePrefix>xxx</NamePrefix>
<FirstName>xxx</FirstName>
</Profile.Get>
</Response>
xxx
xxx
xxxx
xxxx
xxx
xxx
xxx
xml\u event\u info(eventID)
函数没有返回任何内容,只需在末尾添加一条return
语句,然后重试
def xml_event_info(eventID):
xml ='''
<?xml version="1.0" encoding="UTF-8"?>
<YourMembership>
<Version>xxx</Version>
<ApiKey>xxx</ApiKey>
<CallID>xxx</CallID>
<></>
<SaPasscode>xxx</SaPasscode>
<Call Method = "Profile.Get">
<ID>{}</ID>
</Call>
</YourMembership>
'''
headers = {'Content-Type': 'application/x-www-form-urlencoded'}
r = requests.post('url',
data=xml.format(eventID), headers=headers)
print(r.text)
return r.text
defxml\u事件信息(eventID):
xml=“”
xxx
xxx
xxx
xxx
{}
'''
headers={'Content-Type':'application/x-www-form-urlencoded'}
r=请求。post('url',
data=xml.format(eventID),headers=headers)
打印(右文本)
返回r.text
IMO,你的问题很冗长。你能给我一个tldr;版本我很难理解您试图解决的问题。您在@EyuelDK added tldr中缺少了“m”。这是可行的,我现在可以处理函数的输出,但是如何从这里构建数据帧?我进行了编辑。我在返回时使用了您的建议,并创建了嵌套for循环,以迭代最后一个要放入Dataframe的xml输出。Dataframe已创建,但一个xml调用一直挂起,并且不会循环整个for循环。请参阅“完整脚本”部分下的编辑。谢谢你dom=et.fromstring(r.text)
这里是什么r
!是第一个请求的响应还是第二个请求的响应!当我打印“dom”时,我得到的响应是“”