Warning: file_get_contents(/data/phpspider/zhask/data//catemap/2/python/318.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Python 在蟒蛇靓汤和熊猫中填充缺失的枣_Python_Python 3.x_Pandas_Web Scraping_Beautifulsoup - Fatal编程技术网

Python 在蟒蛇靓汤和熊猫中填充缺失的枣

Python 在蟒蛇靓汤和熊猫中填充缺失的枣,python,python-3.x,pandas,web-scraping,beautifulsoup,Python,Python 3.x,Pandas,Web Scraping,Beautifulsoup,我有这个网站,我从那里刮数据作为CSV文件。我勉强算出了日期和价格。然而,日期是周格式,我需要将其转换为日期格式,如5个工作日的每日价格。(周一至周六)。我用蟒蛇、熊猫和漂亮的汤做了这个。 从urllib.request导入urlopen from urllib.error import HTTPError from urllib.error import URLError from bs4 import BeautifulSoup from pandas import DataFrame i

我有这个网站,我从那里刮数据作为CSV文件。我勉强算出了日期和价格。然而,日期是周格式,我需要将其转换为日期格式,如5个工作日的每日价格。(周一至周六)。我用蟒蛇、熊猫和漂亮的汤做了这个。 从urllib.request导入urlopen

from urllib.error import HTTPError 
from urllib.error import URLError
from bs4 import BeautifulSoup
from pandas import DataFrame
import csv
import pandas as pd 
from urllib.request import urlopen
尝试:

除HTTPError作为e外:

print(e)
除URL错误外:

print("Server down or incorrect domain")
其他:


我不完全清楚你们想要什么样的日期,但我把它们都提取出来,并称之为开始日期和结束日期

在:

输出:


您的实际代码为每一行创建一个列表,为每个单元格创建一个列表,但这两者并不匹配。 下面的脚本搜索表(它是唯一具有属性summary的表)并在每行(tr)上循环。然后从Week列(td类B6)中获取“to”之前的第一部分,并将其转换为datetime。 对于每个单元格(td类B3),它将获得价格(或空字符串),设置日期并递增日期

from urllib.error import HTTPError 
from urllib.error import URLError
from bs4 import BeautifulSoup
from pandas import DataFrame
import csv
import pandas as pd 
from urllib.request import urlopen
import datetime

try:
    html = urlopen("https://www.eia.gov/dnav/ng/hist/rngwhhdD.htm")
except HTTPError as e:
    print(e)
except URLError:
    print("Server down or incorrect domain")
else:
    res = BeautifulSoup(html.read(),"html5lib")

table = None
for t in res.findAll("table"):
    table = t if "summary" in t.attrs else table
if table == None: exit()

# stop_date = datetime.datetime(year = 2018, month = 7, day = 12)
# today = datetime.datetime.now()
# abort = False

price_list = []
date_list = []

rows = table.findAll("tr")[1:]
for row in rows:
    date = None
    cells = row.findAll("td")
    if cells[0].get("class") == None: continue # placeholder..
    if "B6" in cells[0].get("class"):
        d = cells[0].getText().split(" to ")[0].strip().replace(" ", "")
        date = datetime.datetime.strptime(d,"%Y%b-%d")
        for cell in cells:
            if "B3" in cell.get("class"): # and abort == False:
                price = cell.getText().strip()
                if price == "" or price == "NA": price = ""
                else: price = float(price)
                price_list.append(price)
                date_list.append(date)
                date = date + datetime.timedelta(days=1)
                #if date > today: abort = True
        #if abort == True: break

d1 = pd.DataFrame({'Date': date_list})
d2 = pd.DataFrame({'Price': price_list})
df = pd.concat([d1,d2], axis=1)
print(df)
df.to_csv(r"Gas Price.csv", index=False, header=True)
res = BeautifulSoup(html.read(),"html5lib")



price = res.findAll(class_=["tbody", "td", "B3"])
price_list = []

for tag in price:
    price_tag=tag.getText()
    price_list.append(price_tag)
    print(price_tag)



date = res.findAll(class_=["tbody", "td", "B6"])
date_list = []

for tag in date:
    date_tag=tag.getText()
    date_list.append(date_tag)
    print(date_tag)


d1 = pd.DataFrame({'Date': date_list})
d2 = pd.DataFrame({'Price': price_list})
df = pd.concat([d1,d2], axis=1)
print(df)
df.to_csv("Gas Price.csv", index=False, header=True)
df = pd.DataFrame({'Date': ['1997 Jan- 6 to Jan-10', '1997 Jan-13 to Jan-17'], 'Price': [3.80, 5.00] })

df['Temp_Year'] = df.Date.str.extract(r'((?:19|20)\d\d)')
df['Temp_Date'] = df.Date.str.replace(r'((?:19|20)\d\d)','')

df[['Start Date', 'End Date']] = df.Temp_Date.str.split('to', expand=True)

df['Start Date'] = pd.to_datetime(df['Temp_Year'] + ' ' + df['Start Date'].str.replace(" ",""))
df['End Date'] = pd.to_datetime(df['Temp_Year'] + ' ' + df['End Date'].str.replace(" ",""))

df.drop(['Temp_Year', 'Temp_Date'], axis=1)
|   | Date                  | Price | Start Date | End Date   |
|---|-----------------------|-------|------------|------------|
| 0 | 1997 Jan- 6 to Jan-10 | 3.8   | 1997-01-06 | 1997-01-10 |
| 1 | 1997 Jan-13 to Jan-17 | 5.0   | 1997-01-13 | 1997-01-17 |
from urllib.error import HTTPError 
from urllib.error import URLError
from bs4 import BeautifulSoup
from pandas import DataFrame
import csv
import pandas as pd 
from urllib.request import urlopen
import datetime

try:
    html = urlopen("https://www.eia.gov/dnav/ng/hist/rngwhhdD.htm")
except HTTPError as e:
    print(e)
except URLError:
    print("Server down or incorrect domain")
else:
    res = BeautifulSoup(html.read(),"html5lib")

table = None
for t in res.findAll("table"):
    table = t if "summary" in t.attrs else table
if table == None: exit()

# stop_date = datetime.datetime(year = 2018, month = 7, day = 12)
# today = datetime.datetime.now()
# abort = False

price_list = []
date_list = []

rows = table.findAll("tr")[1:]
for row in rows:
    date = None
    cells = row.findAll("td")
    if cells[0].get("class") == None: continue # placeholder..
    if "B6" in cells[0].get("class"):
        d = cells[0].getText().split(" to ")[0].strip().replace(" ", "")
        date = datetime.datetime.strptime(d,"%Y%b-%d")
        for cell in cells:
            if "B3" in cell.get("class"): # and abort == False:
                price = cell.getText().strip()
                if price == "" or price == "NA": price = ""
                else: price = float(price)
                price_list.append(price)
                date_list.append(date)
                date = date + datetime.timedelta(days=1)
                #if date > today: abort = True
        #if abort == True: break

d1 = pd.DataFrame({'Date': date_list})
d2 = pd.DataFrame({'Price': price_list})
df = pd.concat([d1,d2], axis=1)
print(df)
df.to_csv(r"Gas Price.csv", index=False, header=True)