Python应用于日期列
我试图在一个数据帧上循环并应用一个customer函数,但是我的date列要么不断损坏,要么给每个元素添加括号 有人知道我做错了什么吗Python应用于日期列,python,Python,我试图在一个数据帧上循环并应用一个customer函数,但是我的date列要么不断损坏,要么给每个元素添加括号 有人知道我做错了什么吗 import numpy as np import string import random # This is a the custom function I use def summarise_dummy(x): d = {} date_index = x['groups_2'] == max(x['groups_2']) d[
import numpy as np
import string
import random
# This is a the custom function I use
def summarise_dummy(x):
d = {}
date_index = x['groups_2'] == max(x['groups_2'])
d['date'] = x['date'][date_index] # do something with date
d['y'] = x['y'][date_index] # do something with y
return pd.Series(d, index=['date', 'y']) # return a series
# Generate some dummy data
todays_date = datetime.datetime.now().date()
date = pd.date_range(todays_date-datetime.timedelta(10), periods=10, freq='D')
columns = ['y']
data = [random.randint(0,10) for i in range(0,10)]
df = pd.DataFrame(data, columns=columns)
df['date'] = date
random.choice(string.letters)
df['date'] = pd.to_datetime(df['date'])
df['groups_1'] = list(np.random.choice(list(string.ascii_lowercase[0:5]), 10))
df['groups_2'] = list(np.random.choice(list(string.ascii_lowercase[0:2]), 10))
# ***
#df.loc[:,'date'] = df.loc[:,'date'].dt.strftime('%Y-%m-%d')
# Apply the function for each group_1
grouped = df.groupby(['groups_1'])
summarised = grouped.apply(summarise_dummy)
# Upon expecting the date column, they are all Nat. However if you uncomment *** (above)
# and re-run, dates are returned?
summarised['date']
# But when I finally run with *** un-commented and convert my output to a json, date has []'s in it's series
summarised_json = summarised.to_json(orient='records')
您希望获得什么样的最终输出 如果在
def summary\u dummy(x)
内将pd.Series
更改为pd.DataFrame
,并沿列设置日期和y
,该功能是否有效
import numpy as np
import string
import random
import pandas as pd
import datetime
# This is a the custom function I use
def summarise_dummy(x):
d = {}
date_index = x['groups_2'] == max(x['groups_2'])
d['date'] = x['date'][date_index] # do something with date
d['y'] = x['y'][date_index] # do something with y
return pd.DataFrame(d, columns=['date', 'y']) # return a series
# Generate some dummy data
date = pd.date_range(datetime.datetime.now().date() - datetime.timedelta(10), periods=10, freq='D')
print(date)
columns = ['y']
data = [random.randint(0, 10) for i in range(0, 10)]
df = pd.DataFrame(data, columns=columns)
df['date'] = date
random.choice(string.ascii_letters)
# df['date'] = pd.to_datetime(df['date'])
df['groups_1'] = list(np.random.choice(list(string.ascii_lowercase[0:5]), 10))
df['groups_2'] = list(np.random.choice(list(string.ascii_lowercase[0:2]), 10))
df['date'] = df['date'].dt.strftime('%Y-%m-%d')
print(df)
# Apply the function for each group_1
grouped = df.groupby(['groups_1'])
summarised = grouped.apply(summarise_dummy)
print(summarised)
# Upon expecting the date column, they are all Nat. However if you uncomment *** (above)
# and re-run, dates are returned?
# But when I finally run with *** un-commented and convert my output to a json, date has []'s in it's series
summarised_json = summarised.to_json(orient='records')
print(summarised_json)
申请后:
date y
groups_1
a 9 2018-08-21 0
b 6 2018-08-18 7
c 4 2018-08-16 0
7 2018-08-19 5
8 2018-08-20 1
d 1 2018-08-13 6
3 2018-08-15 8
e 5 2018-08-17 1
在到_json
之后:
[{"date":"2018-08-21","y":0},{"date":"2018-08-18","y":7},{"date":"2018-08-16","y":0},{"date":"2018-08-19","y":5},{"date":"2018-08-20","y":1},{"date":"2018-08-13","y":6},{"date":"2018-08-15","y":8},{"date":"2018-08-17","y":1}]
此外,您还可以使用orient
配置json
格式