Python 利用熊猫从温德古德获取天气数据

Python 利用熊猫从温德古德获取天气数据,python,pandas,import,weather-api,Python,Pandas,Import,Weather Api,我在Shane Lynn上找到了一套非常有用的脚本 . 第一个脚本用于从Weather Underground中提取数据,如下所示: import requests import pandas as pd from dateutil import parser, rrule from datetime import datetime, time, date import time def getRainfallData(station, day, month, year): """

我在Shane Lynn上找到了一套非常有用的脚本 . 第一个脚本用于从Weather Underground中提取数据,如下所示:

import requests
import pandas as pd
from dateutil import parser, rrule
from datetime import datetime, time, date
import time

def getRainfallData(station, day, month, year):
    Function to return a data frame of minute-level weather data for a single Wunderground PWS station.

        station (string): Station code from the Wunderground website
        day (int): Day of month for which data is requested
        month (int): Month for which data is requested
        year (int): Year for which data is requested

        Pandas Dataframe with weather data for specified station and date.
    url = "{station}&day={day}&month={month}&year={year}&graphspan=day&format=1"
    full_url = url.format(station=station, day=day, month=month, year=year)
    # Request data from wunderground data
    response = requests.get(full_url, headers={'User-agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36'})
    data = response.text
    # remove the excess <br> from the text data
    data = data.replace('<br>', '')
    # Convert to pandas dataframe (fails if issues with weather station)
        dataframe = pd.read_csv(io.StringIO(data), index_col=False)
        dataframe['station'] = station
    except Exception as e:
        print("Issue with date: {}-{}-{} for station {}".format(day,month,year, station))
        return None
    return dataframe

# Generate a list of all of the dates we want data for
start_date = "2016-08-01"
end_date = "2016-08-31"
start = parser.parse(start_date)
end = parser.parse(end_date)
dates = list(rrule.rrule(rrule.DAILY, dtstart=start, until=end))

# Create a list of stations here to download data for
stations = ["ILONDON28"]
# Set a backoff time in seconds if a request fails
backoff_time = 10
data = {}

# Gather data for each station in turn and save to CSV.
for station in stations:
    print("Working on {}".format(station))
    data[station] = []
    for date in dates:
        # Print period status update messages
        if % 10 == 0:
            print("Working on date: {} for station {}".format(date, station))
        done = False
        while done == False:
                weather_data = getRainfallData(station,, date.month, date.year)
                done = True
            except ConnectionError as e:
                # May get rate limited by, backoff if so.
                print("Got connection error on {}".format(date))
                print("Will retry in {} seconds".format(backoff_time))
        # Add each processed date to the overall data
    # Finally combine all of the individual days and output to CSV for analysis.


print e
import io

import io
import requests
import pandas as pd
from dateutil import parser, rrule
from datetime import datetime, time, date
import time

def getRainfallData(station, day, month, year):
    Function to return a data frame of minute-level weather data for a single Wunderground PWS station.

        station (string): Station code from the Wunderground website
        day (int): Day of month for which data is requested
        month (int): Month for which data is requested
        year (int): Year for which data is requested

        Pandas Dataframe with weather data for specified station and date.

    url = "{station}&day={day}&month={month}&year={year}&graphspan=day&format=1"
    full_url = url.format(station=station, day=day, month=month, year=year)

    # Request data from wunderground data
    response = requests.get(full_url)
    data = response.text
    # remove the excess <br> from the text data
    data = data.replace('<br>', '')

    # Convert to pandas dataframe (fails if issues with weather station)
        dataframe = pd.read_csv(io.StringIO(data), index_col=False)
        dataframe['station'] = station
    except Exception as e:
        print("Issue with date: {}-{}-{} for station {}".format(day,month,year, station))
        return None

    return dataframe

# Generate a list of all of the dates we want data for
start_date = "2016-08-01"
end_date = "2016-08-31"
start = parser.parse(start_date)
end = parser.parse(end_date)
dates = list(rrule.rrule(rrule.DAILY, dtstart=start, until=end))

# Create a list of stations here to download data for
stations = ["ILONDONL28"]
# Set a backoff time in seconds if a request fails
backoff_time = 10
data = {}

# Gather data for each station in turn and save to CSV.
for station in stations:
    print("Working on {}".format(station))
    data[station] = []
    for date in dates:
        # Print period status update messages
        if % 10 == 0:
            print("Working on date: {} for station {}".format(date, station))
        done = False
        while done == False:
                weather_data = getRainfallData(station,, date.month, date.year)
                done = True
            except ConnectionError as e:
                # May get rate limited by, backoff if so.
                print("Got connection error on {}".format(date))
                print("Will retry in {} seconds".format(backoff_time))
        # Add each processed date to the overall data
    # Finally combine all of the individual days and output to CSV for analysis.


import io
import io
import requests
import pandas as pd
from dateutil import parser, rrule
from datetime import datetime, time, date
import time

def getRainfallData(station, day, month, year):
    Function to return a data frame of minute-level weather data for a single Wunderground PWS station.

        station (string): Station code from the Wunderground website
        day (int): Day of month for which data is requested
        month (int): Month for which data is requested
        year (int): Year for which data is requested

        Pandas Dataframe with weather data for specified station and date.

    url = "{station}&day={day}&month={month}&year={year}&graphspan=day&format=1"
    full_url = url.format(station=station, day=day, month=month, year=year)

    # Request data from wunderground data
    response = requests.get(full_url)
    data = response.text
    # remove the excess <br> from the text data
    data = data.replace('<br>', '')

    # Convert to pandas dataframe (fails if issues with weather station)
        dataframe = pd.read_csv(io.StringIO(data), index_col=False)
        dataframe['station'] = station
    except Exception as e:
        print("Issue with date: {}-{}-{} for station {}".format(day,month,year, station))
        return None

    return dataframe

# Generate a list of all of the dates we want data for
start_date = "2016-08-01"
end_date = "2016-08-31"
start = parser.parse(start_date)
end = parser.parse(end_date)
dates = list(rrule.rrule(rrule.DAILY, dtstart=start, until=end))

# Create a list of stations here to download data for
stations = ["ILONDONL28"]
# Set a backoff time in seconds if a request fails
backoff_time = 10
data = {}

# Gather data for each station in turn and save to CSV.
for station in stations:
    print("Working on {}".format(station))
    data[station] = []
    for date in dates:
        # Print period status update messages
        if % 10 == 0:
            print("Working on date: {} for station {}".format(date, station))
        done = False
        while done == False:
                weather_data = getRainfallData(station,, date.month, date.year)
                done = True
            except ConnectionError as e:
                # May get rate limited by, backoff if so.
                print("Got connection error on {}".format(date))
                print("Will retry in {} seconds".format(backoff_time))
        # Add each processed date to the overall data
    # Finally combine all of the individual days and output to CSV for analysis.