Python应用于日期列

Python应用于日期列,python,Python,我试图在一个数据帧上循环并应用一个customer函数,但是我的date列要么不断损坏,要么给每个元素添加括号 有人知道我做错了什么吗 import numpy as np import string import random # This is a the custom function I use def summarise_dummy(x): d = {} date_index = x['groups_2'] == max(x['groups_2']) d[

我试图在一个数据帧上循环并应用一个customer函数,但是我的date列要么不断损坏,要么给每个元素添加括号

有人知道我做错了什么吗

import numpy as np
import string
import random

# This is a the custom function I use

def summarise_dummy(x):
    d = {}
    date_index = x['groups_2'] == max(x['groups_2']) 
    d['date'] = x['date'][date_index] # do something with date
    d['y'] = x['y'][date_index] # do something with y

    return pd.Series(d, index=['date', 'y']) # return a series

# Generate some dummy data 

todays_date = datetime.datetime.now().date()
date = pd.date_range(todays_date-datetime.timedelta(10), periods=10, freq='D')
columns = ['y']
data = [random.randint(0,10) for i in range(0,10)]
df = pd.DataFrame(data, columns=columns)
df['date'] = date
random.choice(string.letters)
df['date'] = pd.to_datetime(df['date'])
df['groups_1'] = list(np.random.choice(list(string.ascii_lowercase[0:5]), 10))
df['groups_2'] = list(np.random.choice(list(string.ascii_lowercase[0:2]), 10))

# ***
#df.loc[:,'date'] = df.loc[:,'date'].dt.strftime('%Y-%m-%d')

# Apply the function for each group_1
grouped = df.groupby(['groups_1'])
summarised = grouped.apply(summarise_dummy)

# Upon expecting the date column, they are all Nat. However if you uncomment *** (above)
# and re-run, dates are returned?
summarised['date']

# But when I finally run with *** un-commented and convert my output to a json, date has []'s in it's series
summarised_json = summarised.to_json(orient='records')

您希望获得什么样的最终输出

如果在
def summary\u dummy(x)
内将
pd.Series
更改为
pd.DataFrame
,并沿列设置
日期和
y
,该功能是否有效

import numpy as np
import string
import random
import pandas as pd
import datetime

# This is a the custom function I use
def summarise_dummy(x):
    d = {}
    date_index = x['groups_2'] == max(x['groups_2'])
    d['date'] = x['date'][date_index]  # do something with date
    d['y'] = x['y'][date_index]  # do something with y

    return pd.DataFrame(d, columns=['date', 'y'])  # return a series

# Generate some dummy data
date = pd.date_range(datetime.datetime.now().date() - datetime.timedelta(10), periods=10, freq='D')
print(date)
columns = ['y']
data = [random.randint(0, 10) for i in range(0, 10)]
df = pd.DataFrame(data, columns=columns)
df['date'] = date
random.choice(string.ascii_letters)
# df['date'] = pd.to_datetime(df['date'])
df['groups_1'] = list(np.random.choice(list(string.ascii_lowercase[0:5]), 10))
df['groups_2'] = list(np.random.choice(list(string.ascii_lowercase[0:2]), 10))

df['date'] = df['date'].dt.strftime('%Y-%m-%d')
print(df)

# Apply the function for each group_1
grouped = df.groupby(['groups_1'])
summarised = grouped.apply(summarise_dummy)
print(summarised)

# Upon expecting the date column, they are all Nat. However if you uncomment *** (above)
# and re-run, dates are returned?

# But when I finally run with *** un-commented and convert my output to a json, date has []'s in it's series
summarised_json = summarised.to_json(orient='records')

print(summarised_json)
申请后:

                  date  y
groups_1                 
a        9  2018-08-21  0
b        6  2018-08-18  7
c        4  2018-08-16  0
         7  2018-08-19  5
         8  2018-08-20  1
d        1  2018-08-13  6
         3  2018-08-15  8
e        5  2018-08-17  1
到_json
之后:

[{"date":"2018-08-21","y":0},{"date":"2018-08-18","y":7},{"date":"2018-08-16","y":0},{"date":"2018-08-19","y":5},{"date":"2018-08-20","y":1},{"date":"2018-08-13","y":6},{"date":"2018-08-15","y":8},{"date":"2018-08-17","y":1}]
此外,您还可以使用
orient
配置
json
格式