Python 利用元素树提取信息
XML是 我只想将纬度和经度、日期、小时、海拔、降雨量、气压、风速、风向提取到控制台中,以便最终结果如下所示(正常情况下前三件事(即每小时预测),表中的其余部分): 到目前为止,只有我写过: 脚本:Python 利用元素树提取信息,python,python-3.x,python-3.7,elementtree,Python,Python 3.x,Python 3.7,Elementtree,XML是 我只想将纬度和经度、日期、小时、海拔、降雨量、气压、风速、风向提取到控制台中,以便最终结果如下所示(正常情况下前三件事(即每小时预测),表中的其余部分): 到目前为止,只有我写过: 脚本: import xml.etree.ElementTree as ET import requests def get_forecast_data(api_url): call = requests.get(api_url) call.raise_for_status()
import xml.etree.ElementTree as ET
import requests
def get_forecast_data(api_url):
call = requests.get(api_url)
call.raise_for_status()
tree = ET.fromstring(call.text)
for child in tree:
print(child.tag, child.attrib)
if __name__ == "__main__":
forecast_url = str(r'https://samples.openweathermap.org/data/2.5/forecast?q=London,us&mode=xml&appid=439d4b804bc8187953eb36d2a8c26a02')
print(get_forecast_data(forecast_url))
我被它困住了,有人能帮我吗?我认为,最好的方法是这样做:
import requests
import pandas as pd
from lxml import etree
url = "https://www.yr.no/place/United_States/Massachusetts/Boston/forecast_hour_by_hour.xml"
req = requests.get(url) #use requests to get the data
doc = etree.XML(req.content) #since the data is in xml format, use an xml parser
#get location data using xpath:
loc = doc.xpath('//location/name/text()')[0]
lat = doc.xpath('//location/@latitude')[0]
lon = doc.xpath('//location/@longitude')[0]
alt = doc.xpath('//location/@altitude')[0]
rows = [] #initialize a list to collect the tabular data:
for dat in doc.xpath('//tabular'):
hours = dat.xpath('./time')
sky = dat.xpath('.//symbol')
tmpr = dat.xpath('.//temperature')
for t,s,tm in zip(hours,sky,tmpr):
rows.extend([(t.xpath('./@from')[0],t.xpath('./@to')[0],s.xpath('./@name')[0],tm.xpath('./@value')[0])])
#now prepare your dataframe:
columns = ['From','To','Sky','Temperature'] #set the column headers
print('Location: ',loc)
print('Longitude: ',lon)
print('Latitude: ',lat)
print('Altitude: ',alt)
#create the dataframe
pd.DataFrame(rows,columns=columns)
输出:
Location: Boston
Longitude: -71.05977
Latitude: 42.35843
Altitude: 14
From To Sky Temperature
0 2020-06-20T08:00:00 2020-06-20T09:00:00 Clear sky 24
1 2020-06-20T09:00:00 2020-06-20T10:00:00 Partly cloudy 27
2 2020-06-20T10:00:00 2020-06-20T11:00:00 Clear sky 29
等等
显然,您可以对其进行修改以更改显示值、格式等。如下
import requests
import xml.etree.ElementTree as ET
r = requests.get('https://www.yr.no/place/United_States/Massachusetts/Boston/forecast_hour_by_hour.xml')
if r.status_code == 200:
root = ET.fromstring(r.text)
loc = root.find('./location')
print('Location: {}'.format(loc.find('./name').text))
inner_loc = loc.find('./location')
print('Longitude: {}'.format(inner_loc.attrib['longitude']))
print('Latitude: {}'.format(inner_loc.attrib['latitude']))
print('Altitude: {}'.format(inner_loc.attrib['altitude']))
print()
entries = root.findall('.//time')
for e in entries:
print('From: {} to: {}'.format(e.attrib['from'],e.attrib['to']))
p = e.find('./precipitation')
t = e.find('./temperature')
c = e.find('./symbol')
print('Cloudiness: {}, precipitation: {} , Temperature: {}'.format(c.attrib['name'],p.attrib['value'],t.attrib['value']))
print()
else:
print('Failed to read xml. status code: {}'.format(r.status_code))
它是日期格式。如果我的回答解决了你的问题,请随意投票。
import requests
import xml.etree.ElementTree as ET
r = requests.get('https://www.yr.no/place/United_States/Massachusetts/Boston/forecast_hour_by_hour.xml')
if r.status_code == 200:
root = ET.fromstring(r.text)
loc = root.find('./location')
print('Location: {}'.format(loc.find('./name').text))
inner_loc = loc.find('./location')
print('Longitude: {}'.format(inner_loc.attrib['longitude']))
print('Latitude: {}'.format(inner_loc.attrib['latitude']))
print('Altitude: {}'.format(inner_loc.attrib['altitude']))
print()
entries = root.findall('.//time')
for e in entries:
print('From: {} to: {}'.format(e.attrib['from'],e.attrib['to']))
p = e.find('./precipitation')
t = e.find('./temperature')
c = e.find('./symbol')
print('Cloudiness: {}, precipitation: {} , Temperature: {}'.format(c.attrib['name'],p.attrib['value'],t.attrib['value']))
print()
else:
print('Failed to read xml. status code: {}'.format(r.status_code))