Python 在数据框中添加缺少的星期和时间
我有一个熊猫数据框,看起来像这样: 一天中的时间有不同的层次:清晨、清晨、下午、傍晚、深夜 目的是通过在两次观测之间添加缺失的周天数和时间,使数据统一。例如,如果当前行是星期三清晨,下一行是星期四清晨,我想添加:Python 在数据框中添加缺少的星期和时间,python,python-2.7,pandas,datetime,Python,Python 2.7,Pandas,Datetime,我有一个熊猫数据框,看起来像这样: 一天中的时间有不同的层次:清晨、清晨、下午、傍晚、深夜 目的是通过在两次观测之间添加缺失的周天数和时间,使数据统一。例如,如果当前行是星期三清晨,下一行是星期四清晨,我想添加: Wednesday Morning Wednesday Afternoon Wednesday Evening Wednesday late night Thursday Early Morning 就像中间的一排。到目前为止,我一直在尝试将周数和日数转换成数字,然后减去它们,得出中
Wednesday Morning
Wednesday Afternoon
Wednesday Evening
Wednesday late night
Thursday Early Morning
就像中间的一排。到目前为止,我一直在尝试将周数和日数转换成数字,然后减去它们,得出中间要加多少天或多少次
我想知道是否有更有效的方法进行这项工作。以下是我编写的代码:
for i1, col1 in dfMod.iterrows():
if col1['day'] == "MONDAY":
dfMod.ix[i1,'weekIndex'] = 1
elif col1['day'] == "TUESDAY":
dfMod.ix[i1,'weekIndex'] = 2
elif col1['day'] == "WEDNESDAY":
dfMod.ix[i1,'weekIndex'] = 3
elif col1['day'] == "THURSDAY":
dfMod.ix[i1,'weekIndex'] = 4
elif col1['day'] == "FRIDAY":
dfMod.ix[i1,'weekIndex'] = 5
elif col1['day'] == "SATURDAY":
dfMod.ix[i1,'weekIndex'] = 6
else:
dfMod.ix[i1,'weekIndex'] = 7
if col1['timeType'] == "EARLY MORNING":
dfMod.ix[i1,'dayIndex'] = 1
elif col1['timeType'] == "MORNING":
dfMod.ix[i1,'dayIndex'] = 2
elif col1['timeType'] == "AFTERNOON":
dfMod.ix[i1,'dayIndex'] = 3
elif col1['timeType'] == "EVENING":
dfMod.ix[i1,'dayIndex'] = 4
else:
dfMod.ix[i1,'dayIndex'] = 5
dfMod = dfMod.reset_index(drop= True)
dfMod.leadWeek = dfMod.groupby('adId')['weekIndex'].shift(-1)
dfMod.leadDay = dfMod.groupby('adId')['dayIndex'].shift(-1)
dfMod['diffWeek'] = dfMod['leadWeek'] - dfMod['weekIndex']
dfMod['diffDay'] = dfMod['leadDay'] - dfMod['dayIndex']
dfMod.head()
这里有一个方法可以解决你的问题。这是一个继承自
datetime.datetime
的类,它提供了一些方法来处理字符串,以便在datetime
之间进行转换。拥有datetime的优点是,您可以使用与同一日期关联的各种方法。在本例中,我使用resample
扩展帧并用所需数据填充它
这里说明的另一件事是使用dict
从一件事转换为另一件事。这种类型的结构通常比堆叠的if
s更受欢迎
代码:
import datetime as dt
class penta_datetime(dt.datetime):
""" class which cleaves a day into five pieces """
pandas_period = '288min'
part_day = dt.timedelta(minutes=(24 * 60 / 5))
base_date = dt.datetime.combine(
dt.date.today(), dt.datetime.min.time()) - dt.timedelta(
(dt.date.today().weekday() + 1) % 7)
day_index_offset = {
"LATE NIGHT": part_day * 0,
"EARLY MORNING": part_day * 1,
"MORNING": part_day * 2,
"AFTERNOON": part_day * 3,
"EVENING": part_day * 4,
}
dow_index = {
'MONDAY': 1,
'TUESDAY': 2,
'WEDNESDAY': 3,
'THURSDAY': 4,
'FRIDAY': 5,
'SATURDAY': 6,
'SUNDAY': 7,
}
@classmethod
def to_week(cls, date):
return cls.combine(date, dt.datetime.min.time()) - dt.timedelta(
(date.weekday() + 1) % 7)
@classmethod
def from_strings(cls, dow, tod, week=None):
if week is None:
week = cls.now()
return (cls.to_week(week) +
dt.timedelta(days=cls.dow_index[dow.upper()]) +
cls.day_index_offset[tod.upper()])
@classmethod
def from_datetime(cls, datetime):
return cls.combine(datetime.date(), datetime.time())
@property
def phase_of_day(self):
return self.day_offset_index[self.time()]
@property
def dow_string(self):
return self.dow_strings[self.isoweekday()]
penta_datetime.day_offset_index = {
(penta_datetime.base_date + v).time(): k
for k, v in penta_datetime.day_index_offset.items()}
penta_datetime.dow_strings = {
v: k for k, v in penta_datetime.dow_index.items()}
import pandas as pd
df = pd.DataFrame([
[1, 'WEDNESDAY', 'LATE NIGHT'],
[1, 'WEDNESDAY', 'EARLY MORNING'],
[2, 'WEDNESDAY', 'EVENING'],
[3, 'SATURDAY', 'MORNING'],
[2, 'SATURDAY', 'AFTERNOON'],
], columns=['ad_id', 'day_of_week', 'time_of_day'])
print(df)
def convert_to_datetime(row):
return penta_datetime.from_strings(row.day_of_week, row.time_of_day)
# make a copy of the dataframe
ids = df.copy()
# convert the strings into a datetime
ids['ts'] = df.apply(convert_to_datetime, axis=1)
# set the timestamps as the index
ids.set_index(['ts'], inplace=True)
# resample to 5 times a day, and pad the data into the holes
ids = ids.resample(penta_datetime.pandas_period).pad().reset_index()
# (optional) convert the strings to match extended timestamps
ids['time_of_day'] = ids['ts'].apply(
lambda ts: penta_datetime.from_datetime(ts).phase_of_day)
ids['day_of_week'] = ids['ts'].apply(
lambda ts: penta_datetime.from_datetime(ts).dow_string)
print(ids)
ad_id day_of_week time_of_day
0 1 WEDNESDAY LATE NIGHT
1 1 WEDNESDAY EARLY MORNING
2 2 WEDNESDAY EVENING
3 3 SATURDAY MORNING
4 2 SATURDAY AFTERNOON
ts ad_id day_of_week time_of_day
0 2017-03-15 00:00:00 1 WEDNESDAY LATE NIGHT
1 2017-03-15 04:48:00 1 WEDNESDAY EARLY MORNING
2 2017-03-15 09:36:00 1 WEDNESDAY MORNING
3 2017-03-15 14:24:00 1 WEDNESDAY AFTERNOON
4 2017-03-15 19:12:00 2 WEDNESDAY EVENING
5 2017-03-16 00:00:00 2 THURSDAY LATE NIGHT
6 2017-03-16 04:48:00 2 THURSDAY EARLY MORNING
7 2017-03-16 09:36:00 2 THURSDAY MORNING
8 2017-03-16 14:24:00 2 THURSDAY AFTERNOON
9 2017-03-16 19:12:00 2 THURSDAY EVENING
10 2017-03-17 00:00:00 2 FRIDAY LATE NIGHT
11 2017-03-17 04:48:00 2 FRIDAY EARLY MORNING
12 2017-03-17 09:36:00 2 FRIDAY MORNING
13 2017-03-17 14:24:00 2 FRIDAY AFTERNOON
14 2017-03-17 19:12:00 2 FRIDAY EVENING
15 2017-03-18 00:00:00 2 SATURDAY LATE NIGHT
16 2017-03-18 04:48:00 2 SATURDAY EARLY MORNING
17 2017-03-18 09:36:00 3 SATURDAY MORNING
18 2017-03-18 14:24:00 2 SATURDAY AFTERNOON
测试代码:
import datetime as dt
class penta_datetime(dt.datetime):
""" class which cleaves a day into five pieces """
pandas_period = '288min'
part_day = dt.timedelta(minutes=(24 * 60 / 5))
base_date = dt.datetime.combine(
dt.date.today(), dt.datetime.min.time()) - dt.timedelta(
(dt.date.today().weekday() + 1) % 7)
day_index_offset = {
"LATE NIGHT": part_day * 0,
"EARLY MORNING": part_day * 1,
"MORNING": part_day * 2,
"AFTERNOON": part_day * 3,
"EVENING": part_day * 4,
}
dow_index = {
'MONDAY': 1,
'TUESDAY': 2,
'WEDNESDAY': 3,
'THURSDAY': 4,
'FRIDAY': 5,
'SATURDAY': 6,
'SUNDAY': 7,
}
@classmethod
def to_week(cls, date):
return cls.combine(date, dt.datetime.min.time()) - dt.timedelta(
(date.weekday() + 1) % 7)
@classmethod
def from_strings(cls, dow, tod, week=None):
if week is None:
week = cls.now()
return (cls.to_week(week) +
dt.timedelta(days=cls.dow_index[dow.upper()]) +
cls.day_index_offset[tod.upper()])
@classmethod
def from_datetime(cls, datetime):
return cls.combine(datetime.date(), datetime.time())
@property
def phase_of_day(self):
return self.day_offset_index[self.time()]
@property
def dow_string(self):
return self.dow_strings[self.isoweekday()]
penta_datetime.day_offset_index = {
(penta_datetime.base_date + v).time(): k
for k, v in penta_datetime.day_index_offset.items()}
penta_datetime.dow_strings = {
v: k for k, v in penta_datetime.dow_index.items()}
import pandas as pd
df = pd.DataFrame([
[1, 'WEDNESDAY', 'LATE NIGHT'],
[1, 'WEDNESDAY', 'EARLY MORNING'],
[2, 'WEDNESDAY', 'EVENING'],
[3, 'SATURDAY', 'MORNING'],
[2, 'SATURDAY', 'AFTERNOON'],
], columns=['ad_id', 'day_of_week', 'time_of_day'])
print(df)
def convert_to_datetime(row):
return penta_datetime.from_strings(row.day_of_week, row.time_of_day)
# make a copy of the dataframe
ids = df.copy()
# convert the strings into a datetime
ids['ts'] = df.apply(convert_to_datetime, axis=1)
# set the timestamps as the index
ids.set_index(['ts'], inplace=True)
# resample to 5 times a day, and pad the data into the holes
ids = ids.resample(penta_datetime.pandas_period).pad().reset_index()
# (optional) convert the strings to match extended timestamps
ids['time_of_day'] = ids['ts'].apply(
lambda ts: penta_datetime.from_datetime(ts).phase_of_day)
ids['day_of_week'] = ids['ts'].apply(
lambda ts: penta_datetime.from_datetime(ts).dow_string)
print(ids)
ad_id day_of_week time_of_day
0 1 WEDNESDAY LATE NIGHT
1 1 WEDNESDAY EARLY MORNING
2 2 WEDNESDAY EVENING
3 3 SATURDAY MORNING
4 2 SATURDAY AFTERNOON
ts ad_id day_of_week time_of_day
0 2017-03-15 00:00:00 1 WEDNESDAY LATE NIGHT
1 2017-03-15 04:48:00 1 WEDNESDAY EARLY MORNING
2 2017-03-15 09:36:00 1 WEDNESDAY MORNING
3 2017-03-15 14:24:00 1 WEDNESDAY AFTERNOON
4 2017-03-15 19:12:00 2 WEDNESDAY EVENING
5 2017-03-16 00:00:00 2 THURSDAY LATE NIGHT
6 2017-03-16 04:48:00 2 THURSDAY EARLY MORNING
7 2017-03-16 09:36:00 2 THURSDAY MORNING
8 2017-03-16 14:24:00 2 THURSDAY AFTERNOON
9 2017-03-16 19:12:00 2 THURSDAY EVENING
10 2017-03-17 00:00:00 2 FRIDAY LATE NIGHT
11 2017-03-17 04:48:00 2 FRIDAY EARLY MORNING
12 2017-03-17 09:36:00 2 FRIDAY MORNING
13 2017-03-17 14:24:00 2 FRIDAY AFTERNOON
14 2017-03-17 19:12:00 2 FRIDAY EVENING
15 2017-03-18 00:00:00 2 SATURDAY LATE NIGHT
16 2017-03-18 04:48:00 2 SATURDAY EARLY MORNING
17 2017-03-18 09:36:00 3 SATURDAY MORNING
18 2017-03-18 14:24:00 2 SATURDAY AFTERNOON
结果:
import datetime as dt
class penta_datetime(dt.datetime):
""" class which cleaves a day into five pieces """
pandas_period = '288min'
part_day = dt.timedelta(minutes=(24 * 60 / 5))
base_date = dt.datetime.combine(
dt.date.today(), dt.datetime.min.time()) - dt.timedelta(
(dt.date.today().weekday() + 1) % 7)
day_index_offset = {
"LATE NIGHT": part_day * 0,
"EARLY MORNING": part_day * 1,
"MORNING": part_day * 2,
"AFTERNOON": part_day * 3,
"EVENING": part_day * 4,
}
dow_index = {
'MONDAY': 1,
'TUESDAY': 2,
'WEDNESDAY': 3,
'THURSDAY': 4,
'FRIDAY': 5,
'SATURDAY': 6,
'SUNDAY': 7,
}
@classmethod
def to_week(cls, date):
return cls.combine(date, dt.datetime.min.time()) - dt.timedelta(
(date.weekday() + 1) % 7)
@classmethod
def from_strings(cls, dow, tod, week=None):
if week is None:
week = cls.now()
return (cls.to_week(week) +
dt.timedelta(days=cls.dow_index[dow.upper()]) +
cls.day_index_offset[tod.upper()])
@classmethod
def from_datetime(cls, datetime):
return cls.combine(datetime.date(), datetime.time())
@property
def phase_of_day(self):
return self.day_offset_index[self.time()]
@property
def dow_string(self):
return self.dow_strings[self.isoweekday()]
penta_datetime.day_offset_index = {
(penta_datetime.base_date + v).time(): k
for k, v in penta_datetime.day_index_offset.items()}
penta_datetime.dow_strings = {
v: k for k, v in penta_datetime.dow_index.items()}
import pandas as pd
df = pd.DataFrame([
[1, 'WEDNESDAY', 'LATE NIGHT'],
[1, 'WEDNESDAY', 'EARLY MORNING'],
[2, 'WEDNESDAY', 'EVENING'],
[3, 'SATURDAY', 'MORNING'],
[2, 'SATURDAY', 'AFTERNOON'],
], columns=['ad_id', 'day_of_week', 'time_of_day'])
print(df)
def convert_to_datetime(row):
return penta_datetime.from_strings(row.day_of_week, row.time_of_day)
# make a copy of the dataframe
ids = df.copy()
# convert the strings into a datetime
ids['ts'] = df.apply(convert_to_datetime, axis=1)
# set the timestamps as the index
ids.set_index(['ts'], inplace=True)
# resample to 5 times a day, and pad the data into the holes
ids = ids.resample(penta_datetime.pandas_period).pad().reset_index()
# (optional) convert the strings to match extended timestamps
ids['time_of_day'] = ids['ts'].apply(
lambda ts: penta_datetime.from_datetime(ts).phase_of_day)
ids['day_of_week'] = ids['ts'].apply(
lambda ts: penta_datetime.from_datetime(ts).dow_string)
print(ids)
ad_id day_of_week time_of_day
0 1 WEDNESDAY LATE NIGHT
1 1 WEDNESDAY EARLY MORNING
2 2 WEDNESDAY EVENING
3 3 SATURDAY MORNING
4 2 SATURDAY AFTERNOON
ts ad_id day_of_week time_of_day
0 2017-03-15 00:00:00 1 WEDNESDAY LATE NIGHT
1 2017-03-15 04:48:00 1 WEDNESDAY EARLY MORNING
2 2017-03-15 09:36:00 1 WEDNESDAY MORNING
3 2017-03-15 14:24:00 1 WEDNESDAY AFTERNOON
4 2017-03-15 19:12:00 2 WEDNESDAY EVENING
5 2017-03-16 00:00:00 2 THURSDAY LATE NIGHT
6 2017-03-16 04:48:00 2 THURSDAY EARLY MORNING
7 2017-03-16 09:36:00 2 THURSDAY MORNING
8 2017-03-16 14:24:00 2 THURSDAY AFTERNOON
9 2017-03-16 19:12:00 2 THURSDAY EVENING
10 2017-03-17 00:00:00 2 FRIDAY LATE NIGHT
11 2017-03-17 04:48:00 2 FRIDAY EARLY MORNING
12 2017-03-17 09:36:00 2 FRIDAY MORNING
13 2017-03-17 14:24:00 2 FRIDAY AFTERNOON
14 2017-03-17 19:12:00 2 FRIDAY EVENING
15 2017-03-18 00:00:00 2 SATURDAY LATE NIGHT
16 2017-03-18 04:48:00 2 SATURDAY EARLY MORNING
17 2017-03-18 09:36:00 3 SATURDAY MORNING
18 2017-03-18 14:24:00 2 SATURDAY AFTERNOON
注意:
import datetime as dt
class penta_datetime(dt.datetime):
""" class which cleaves a day into five pieces """
pandas_period = '288min'
part_day = dt.timedelta(minutes=(24 * 60 / 5))
base_date = dt.datetime.combine(
dt.date.today(), dt.datetime.min.time()) - dt.timedelta(
(dt.date.today().weekday() + 1) % 7)
day_index_offset = {
"LATE NIGHT": part_day * 0,
"EARLY MORNING": part_day * 1,
"MORNING": part_day * 2,
"AFTERNOON": part_day * 3,
"EVENING": part_day * 4,
}
dow_index = {
'MONDAY': 1,
'TUESDAY': 2,
'WEDNESDAY': 3,
'THURSDAY': 4,
'FRIDAY': 5,
'SATURDAY': 6,
'SUNDAY': 7,
}
@classmethod
def to_week(cls, date):
return cls.combine(date, dt.datetime.min.time()) - dt.timedelta(
(date.weekday() + 1) % 7)
@classmethod
def from_strings(cls, dow, tod, week=None):
if week is None:
week = cls.now()
return (cls.to_week(week) +
dt.timedelta(days=cls.dow_index[dow.upper()]) +
cls.day_index_offset[tod.upper()])
@classmethod
def from_datetime(cls, datetime):
return cls.combine(datetime.date(), datetime.time())
@property
def phase_of_day(self):
return self.day_offset_index[self.time()]
@property
def dow_string(self):
return self.dow_strings[self.isoweekday()]
penta_datetime.day_offset_index = {
(penta_datetime.base_date + v).time(): k
for k, v in penta_datetime.day_index_offset.items()}
penta_datetime.dow_strings = {
v: k for k, v in penta_datetime.dow_index.items()}
import pandas as pd
df = pd.DataFrame([
[1, 'WEDNESDAY', 'LATE NIGHT'],
[1, 'WEDNESDAY', 'EARLY MORNING'],
[2, 'WEDNESDAY', 'EVENING'],
[3, 'SATURDAY', 'MORNING'],
[2, 'SATURDAY', 'AFTERNOON'],
], columns=['ad_id', 'day_of_week', 'time_of_day'])
print(df)
def convert_to_datetime(row):
return penta_datetime.from_strings(row.day_of_week, row.time_of_day)
# make a copy of the dataframe
ids = df.copy()
# convert the strings into a datetime
ids['ts'] = df.apply(convert_to_datetime, axis=1)
# set the timestamps as the index
ids.set_index(['ts'], inplace=True)
# resample to 5 times a day, and pad the data into the holes
ids = ids.resample(penta_datetime.pandas_period).pad().reset_index()
# (optional) convert the strings to match extended timestamps
ids['time_of_day'] = ids['ts'].apply(
lambda ts: penta_datetime.from_datetime(ts).phase_of_day)
ids['day_of_week'] = ids['ts'].apply(
lambda ts: penta_datetime.from_datetime(ts).dow_string)
print(ids)
ad_id day_of_week time_of_day
0 1 WEDNESDAY LATE NIGHT
1 1 WEDNESDAY EARLY MORNING
2 2 WEDNESDAY EVENING
3 3 SATURDAY MORNING
4 2 SATURDAY AFTERNOON
ts ad_id day_of_week time_of_day
0 2017-03-15 00:00:00 1 WEDNESDAY LATE NIGHT
1 2017-03-15 04:48:00 1 WEDNESDAY EARLY MORNING
2 2017-03-15 09:36:00 1 WEDNESDAY MORNING
3 2017-03-15 14:24:00 1 WEDNESDAY AFTERNOON
4 2017-03-15 19:12:00 2 WEDNESDAY EVENING
5 2017-03-16 00:00:00 2 THURSDAY LATE NIGHT
6 2017-03-16 04:48:00 2 THURSDAY EARLY MORNING
7 2017-03-16 09:36:00 2 THURSDAY MORNING
8 2017-03-16 14:24:00 2 THURSDAY AFTERNOON
9 2017-03-16 19:12:00 2 THURSDAY EVENING
10 2017-03-17 00:00:00 2 FRIDAY LATE NIGHT
11 2017-03-17 04:48:00 2 FRIDAY EARLY MORNING
12 2017-03-17 09:36:00 2 FRIDAY MORNING
13 2017-03-17 14:24:00 2 FRIDAY AFTERNOON
14 2017-03-17 19:12:00 2 FRIDAY EVENING
15 2017-03-18 00:00:00 2 SATURDAY LATE NIGHT
16 2017-03-18 04:48:00 2 SATURDAY EARLY MORNING
17 2017-03-18 09:36:00 3 SATURDAY MORNING
18 2017-03-18 14:24:00 2 SATURDAY AFTERNOON
您在几周内还不太清楚如何处理更改,因此这只是一个小插曲,留给读者作为练习。最好显示您尝试的代码、您期望的输出,最好添加一些代码作为测试数据框,供我们使用