Warning: file_get_contents(/data/phpspider/zhask/data//catemap/2/python/280.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Python 为什么';指数';对象没有属性';月份名称';(工作顺利后代码无故停止工作)_Python_Pandas_Dataframe_Datetime_Python Datetime - Fatal编程技术网

Python 为什么';指数';对象没有属性';月份名称';(工作顺利后代码无故停止工作)

Python 为什么';指数';对象没有属性';月份名称';(工作顺利后代码无故停止工作),python,pandas,dataframe,datetime,python-datetime,Python,Pandas,Dataframe,Datetime,Python Datetime,所以,我的代码几分钟前还在工作,但出于某种原因,尽管它是同一代码,但它停止了工作,并给了我“Index”对象,该行没有“month\u name”属性 for i, month in enumerate(feature_df.index.month_name().unique(), 3): feature_df[month] = (feature_df.index.month == i+1).astype(int) 注意:有些东西的奇怪格式是因为我使用colab,它会自动打印 """ d

所以,我的代码几分钟前还在工作,但出于某种原因,尽管它是同一代码,但它停止了工作,并给了我“Index”对象,该行没有“month\u name”属性

for i, month in enumerate(feature_df.index.month_name().unique(), 3):
  feature_df[month] = (feature_df.index.month == i+1).astype(int)
注意:有些东西的奇怪格式是因为我使用colab,它会自动打印

"""

data_urls = {
  "2014": "https://sitewebbixi.s3.amazonaws.com/uploads/docs/biximontrealrentals2014-f040e0.zip",
  "2015": "https://sitewebbixi.s3.amazonaws.com/uploads/docs/biximontrealrentals2015-69fdf0.zip",
  "2016": "https://sitewebbixi.s3.amazonaws.com/uploads/docs/biximontrealrentals2016-912f00.zip",
  "2017": "https://sitewebbixi.s3.amazonaws.com/uploads/docs/biximontrealrentals2017-d4d086.zip",
  "2018": "https://sitewebbixi.s3.amazonaws.com/uploads/docs/biximontrealrentals2018-96034e.zip",
  "2019": "https://sitewebbixi.s3.amazonaws.com/uploads/docs/biximontrealrentals2019-33ea73.zip",
}

"""# Load the data"""

"""
Load all the data

We will need:
- the requests package to surf the web, 
- the io package to read the data stream from the response,
- the zipfile package to manipulate the archive.
"""
import io
import pandas as pd
import requests
import zipfile


df = None
for year, url in data_urls.items():
  print("Processing {}".format(year))
  # Load the url
  response = requests.get(url)
  # Read the archive from the response
  archive = zipfile.ZipFile(io.BytesIO(response.content))
  # Loop over all the files in the archive
  for file in archive.namelist():
    # Check that we are looking at one of the files we want
    if not archive.getinfo(file).is_dir() and "Station" not in file:
      print("Loading data from: {}".format(file))
      # We will load the start_date column only to save on memory use
      try:
        current_length = len(df)
        df = df.append(
          pd.read_csv(archive.open(file), usecols=["start_date"]),
          ignore_index=True, 
        )
      except:
        current_length = 0
        df = pd.read_csv(archive.open(file), usecols=["start_date"])
      print(" > {} rows processed".format(len(df) - current_length))
  response.close()

"""# Convert to datetime"""

#This converts our dataframe to datetime
df["start_date"] = pd.to_datetime(df["start_date"])

df.describe()

df.info()

"""#Adding values that will help with the project"""

#This adds values for each date (without the hour)
df["date"] = df["start_date"].dt.date

#This adds values for each year from 2014 to 2019 to the dataframe
df["year"] = df["start_date"].dt.year

#This adds values for each day of the week to the dataframe
df["dayoftheweek"] = df["start_date"].dt.dayofweek

#This adds values for each day of the year to the dataframe
df["dayoftheyear"] = df["start_date"].dt.dayofyear

#This adds values for each month to the dataframe 
df["month"] = df["start_date"].dt.month

#This adds values for each week of the year to the dataframe
df["week"] = df["start_date"].dt.week

#This adds values for each hour (in a day) to the dataframe 
df["houroftheday"] = df["start_date"].dt.hour

"""# Visualizations to see the trends"""

#Get simple dataframes for each type of data in order to graph them
year_df = df["year"].value_counts(normalize=True).sort_index()
dayoftheweek_df = df["dayoftheweek"].value_counts(normalize=True).sort_index()
dayoftheyear_df = df["dayoftheyear"].value_counts(normalize=True).sort_index()
week_df = df["week"].value_counts(normalize=True).sort_index()
month_df = df["month"].value_counts(normalize=True).sort_index()
houroftheday_df = df["houroftheday"].value_counts(normalize=True).sort_index()
date_df = df["date"].value_counts(normalize=True).sort_index()

"""##Trends for each **year**"""

import matplotlib.pyplot as plt
plt.figure(figsize=[16,9], dpi=300)
plt.bar(year_df.index, year_df.values)
plt.show()

"""Usage goes higher each year, so that needs to be taken into account.

##Trends for each **day of the week**
"""

plt.figure(figsize=[16,9], dpi=300)
plt.bar(dayoftheweek_df.index, dayoftheweek_df.values)
plt.show()

"""In the graph above, 0 stands for Monday, 1 for Tuesday, so on and so forth.<br> So, BIXI usage is smaller during Saturday and Sunday. That seems to be a factor.

##Trends for each **month**
"""

plt.figure(figsize=[16,9], dpi=300)
plt.bar(month_df.index, month_df.values)
plt.show()

"""The trend is lower in the months where BIXI ends/starts and when it gets colder. Usage peaks in summer.

##Trends for each **day of the year**
"""

plt.figure(figsize=[16,9], dpi=300)
plt.bar(dayoftheyear_df.index, dayoftheyear_df.values)
plt.show()

"""The trend is similar to the months except, that on certain weekends (it's every seven days) we have a few drops.

## Trends per date
"""

plt.figure(figsize=[16,9], dpi=300)
plt.bar(date_df.index, date_df.values)
plt.show()

"""This is essentially a combination of the trend per year and the trend per day of the year together, but it uses the actual dates.

##Trends for each **week**
"""

plt.figure(figsize=[16,9], dpi=300)
plt.bar(week_df.index, week_df.values)
plt.show()

"""This trend is pretty much the monthly trend but with weeks (we see how the changes in usage happen in more detail)

## Trends for each **hour**
"""

plt.figure(figsize=[16,9], dpi=300)
plt.bar(houroftheday_df.index, houroftheday_df.values)
plt.show()

"""As we can see here, there's more bixi usage around rush hour periods (8 am & 3-6 pm), so we need to take that into account, since it is an important factor.

#Building the model
To build the model, we first need to set the target vector, then create the feature matrix and finally initializing the model that will make our predictions.

##Target vector
"""

target_df = df.groupby("date").size()
target_df

"""## Feature matrix
Here we will create the feature matrix
"""

"""###Feature #1 : Day of the week"""

feature_df = pd.get_dummies(df.groupby("date").first(), columns=["dayoftheweek"], prefix="", prefix_sep="").loc[:,["0", "1", "2", "3", "4", "5", "6"]]
feature_df

"""###Feature #2 : Month of the year"""

for i, month in enumerate(feature_df.index.month_name().unique(), 3):
  feature_df[month] = (feature_df.index.month == i+1).astype(int)

feature_df

"""##Dropping redundant columns"""

#This removes Monday and April, they will serve as our baseline
feature_matrix = feature_df.drop(columns=["0", "April"])

"""## Choosing a model
The model will be initialized and then used to make some predictions
"""

from sklearn.linear_model import LinearRegression

model = LinearRegression()
model

model.fit(feature_matrix , target_df)

model.coef_

model.intercept_

parameters = pd.Series(model.coef_, index=feature_matrix.columns)
parameters

"""#Predicting"""

#June 4th 2016 was a Tuesday
feature_matrix.loc["2019-06-04"]

feature_matrix.loc["2019-06-04"].values.reshape(1, -1)

model.predict(feature_matrix.loc["2019-06-04"].values.reshape(1, -1))[0]

"""##Quick plot for the initial features"""

plt.figure(figsize=[21,9], dpi=300)
plt.plot(feature_matrix.index, target_df, feature_matrix.index, model.predict(feature_matrix))
plt.show()

# 2016 only, since it seems to be the most accurate
from matplotlib import dates as mdate
plt.figure(figsize=[21,9], dpi=300)
plt.plot(feature_matrix.index, target_df, feature_matrix.index, model.predict(feature_matrix))
plt.xlim(left=mdate.datestr2num("2016-04-15"), right=mdate.datestr2num("2016-12-31"))
plt.show()

# 2019 only, since it seems to be the least accurate
from matplotlib import dates as mdate
plt.figure(figsize=[21,9], dpi=300)
plt.plot(feature_matrix.index, target_df, feature_matrix.index, model.predict(feature_matrix))
plt.xlim(left=mdate.datestr2num("2019-04-15"), right=mdate.datestr2num("2019-10-31"))
plt.show()

"""## Initial RMSE"""

import numpy as np
from sklearn.utils import resample
np.random.seed(1)
uncertainty = np.std([model.fit(*resample(feature_matrix, target_df)).coef_ for i in range(1000)], 0)
params = pd.Series(uncertainty, index=feature_matrix.columns)
params

"""###Check RMSE for 2016 & 2019

####2019
"""

import math
from sklearn.metrics import mean_squared_error

#Use all data and then check 2019 predictions
error = mean_squared_error(model.predict(feature_matrix.loc[feature_matrix.index.year == 2019]), target_df.loc[target_df.index.year == 2019])
print(math.sqrt(error))

# Retrain while leaving out 2018, and then try to predict 2018
model.fit(feature_matrix.loc[feature_matrix.index.year < 2019], target_df.loc[target_df.index.year < 2019])
error = mean_squared_error(model.predict(feature_matrix.loc[feature_matrix.index.year == 2019]), target_df.loc[target_df.index.year == 2019])
print(math.sqrt(error))

"""####2016"""

import math
from sklearn.metrics import mean_squared_error

#Use all data and then check 2016 predictions
error = mean_squared_error(model.predict(feature_matrix.loc[feature_matrix.index.year == 2016]), target_df.loc[target_df.index.year == 2016])
print(math.sqrt(error))

# Retrain while leaving out 2016, and then try to predict 2016
model.fit(feature_matrix.loc[feature_matrix.index.year < 2016], target_df.loc[target_df.index.year < 2016])
error = mean_squared_error(model.predict(feature_matrix.loc[feature_matrix.index.year == 2016]), target_df.loc[target_df.index.year == 2016])
print(math.sqrt(error))

"""As the graph showed, 2016 is currently more accurate than 2019

#Feature #3: Day of the year'''
“”“
数据地址={
"2014": "https://sitewebbixi.s3.amazonaws.com/uploads/docs/biximontrealrentals2014-f040e0.zip",
"2015": "https://sitewebbixi.s3.amazonaws.com/uploads/docs/biximontrealrentals2015-69fdf0.zip",
"2016": "https://sitewebbixi.s3.amazonaws.com/uploads/docs/biximontrealrentals2016-912f00.zip",
"2017": "https://sitewebbixi.s3.amazonaws.com/uploads/docs/biximontrealrentals2017-d4d086.zip",
"2018": "https://sitewebbixi.s3.amazonaws.com/uploads/docs/biximontrealrentals2018-96034e.zip",
"2019": "https://sitewebbixi.s3.amazonaws.com/uploads/docs/biximontrealrentals2019-33ea73.zip",
}
“”加载数据“”
"""
加载所有数据
我们需要:
-网上冲浪的请求包,
-从响应读取数据流的io包,
-zipfile包来操作归档文件。
"""
输入io
作为pd进口熊猫
导入请求
进口拉链
df=无
对于年份,数据中的url_url.items():
打印(“处理{}”。格式(年))
#加载url
response=requests.get(url)
#从响应中读取归档文件
archive=zipfile.zipfile(io.BytesIO(response.content))
#循环浏览存档中的所有文件
对于archive.namelist()中的文件:
#检查我们是否正在查看所需的文件之一
如果没有存档.getinfo(文件).is_dir()和“Station”不在文件中:
打印(“从:{}加载数据”。格式(文件))
#我们将只加载开始日期列以节省内存使用
尝试:
当前长度=长度(df)
df=df.append(
pd.read\u csv(archive.open(file),usecols=[“开始日期]),
忽略_index=True,
)
除:
当前长度=0
df=pd.read\u csv(archive.open(file),usecols=[“开始日期”])
打印(“>{}行已处理”。格式(len(df)-当前_长度)
答复:close()
“#转换为日期时间”
#这会将数据帧转换为日期时间
df[“开始日期”]=pd.至日期时间(df[“开始日期”])
df.descripe()
df.info()
“”添加有助于项目的值“”
#这会为每个日期(不含小时)添加值
df[“日期”]=df[“开始日期”].dt.date
#这会将2014年至2019年期间每年的值添加到数据框中
df[“年”]=df[“开始日期”].dt.year
#这会将一周中每一天的值添加到数据框中
df[“dayofweek”]=df[“开始日期”].dt.dayofweek
#这会将一年中每一天的值添加到数据框中
df[“dayofyear”]=df[“开始日期”].dt.dayofyear
#这会将每个月的值添加到数据帧中
df[“月”]=df[“开始日期”].dt.month
#这会将一年中每周的值添加到数据框中
df[“周”]=df[“开始日期”].dt.week
#这会将每小时(一天中)的值添加到数据帧中
df[“houroftheday”]=df[“开始日期”].dt.hour
“#可视化以查看趋势”
#为每种类型的数据获取简单的数据帧,以便对它们进行图形化
年份df=df[“年份”]。值计数(normalize=True)。排序索引()
DayOfWeek_df=df[“DayOfWeek”]。值计数(normalize=True)。排序索引()
DayOf Year_df=df[“DayOf Year”]。值_计数(normalize=True)。排序_索引()
周数df=df[“周数]。值计数(normalize=True)。排序索引()
月份df=df[“月”]。值计数(normalize=True)。排序索引()
小时数df=df[“小时数”]。值计数(normalize=True)。排序索引()
日期df=df[“日期”]。值计数(normalize=True)。排序索引()
“每年**的趋势**”
将matplotlib.pyplot作为plt导入
plt.图(figsize=[16,9],dpi=300)
plt.bar(年份测向指数、年份测向值)
plt.show()
“”使用率每年都在上升,因此需要考虑到这一点。
##每周**天的趋势**
"""
plt.图(figsize=[16,9],dpi=300)
plt.bar(星期日测向指数,星期日测向值)
plt.show()
“”“在上图中,0代表星期一,1代表星期二,依此类推。
因此,周六和周日的比西用法较小。这似乎是一个因素。 ##每个**月的趋势** """ plt.图(figsize=[16,9],dpi=300) plt.bar(月测向指数、月测向值) plt.show() “在碧玺结束/开始的月份以及天气变冷时,趋势较低。使用高峰在夏季。 ##每年**天的趋势** """ plt.图(figsize=[16,9],dpi=300) plt.bar(Dayof Year_df.index,Dayof Year_df.value) plt.show() “这一趋势与几个月的趋势相似,只是在某些周末(每七天一次),我们会有几次下跌。 ##每个日期的趋势 """ plt.图(figsize=[16,9],dpi=300) 打印条(日期索引、日期值) plt.show() “这本质上是每年趋势和每年每天趋势的组合,但它使用实际日期。 ##每周**的趋势** """ plt.图(figsize=[16,9],dpi=300) plt.bar(周测向指数、周测向值) plt.show() “这一趋势几乎是每月的趋势,但有几周的时间(我们可以更详细地了解使用情况的变化情况) ##每**小时的趋势** """ plt.图(figsize=[16,9],dpi=300) 打印条(小时刻度索引、小时刻度值) plt.show() “正如我们在这里看到的,在高峰时段(上午8点和下午3-6点),比西的使用率更高,因此我们需要考虑到这一点,因为这是一个重要因素。 #构建模型 为了建立模型,我们首先需要设置目标向量,然后创建特征矩阵,最后初始化模型以进行预测。 ##目标向量 """ target_df=df.groupby(“日期”).size() 目标位置 “”“##特征矩阵 这里我们将创建特征矩阵 """ “####特写#1:一周中的某一天” feature_df=pd.get_dummies(df.groupby(“date”).first(),columns=[“DayOfWeek”],prefix=”,prefix_sep=“”)。
for i, month in enumerate(pd.Series(feature_df.index).map(lambda x:pd._libs.tslibs.timestamps.Timestamp(x)).dt.month_name().unique(), 3):

    x = (pd.Series(pd.Series(feature_df.index).map(lambda x:pd._libs.tslibs.timestamps.Timestamp(x)).dt.month == (i+1))).astype(int)
    x.index = feature_df.index
    feature_df[month]=x
feature_df