使用Python将连续日期分组
给定:使用Python将连续日期分组,python,python-2.7,Python,Python 2.7,给定: dates = [ datetime(2014, 10, 11), datetime(2014, 10, 1), datetime(2014, 10, 2), datetime(2014, 10, 3), datetime(2014, 10, 5), datetime(2014, 10, 5), datetime(2014, 10, 6), datetime(2014, 10, 22), datetime(2014, 10, 20),
dates = [
datetime(2014, 10, 11),
datetime(2014, 10, 1),
datetime(2014, 10, 2),
datetime(2014, 10, 3),
datetime(2014, 10, 5),
datetime(2014, 10, 5),
datetime(2014, 10, 6),
datetime(2014, 10, 22),
datetime(2014, 10, 20),
datetime(2014, 10, 21),
datetime(2014, 10, 9),
datetime(2014, 10, 7),
datetime(2014, 10, 6)
]
expect = [
[datetime(2014, 10, 1), datetime(2014, 10, 3)],
[datetime(2014, 10, 5), datetime(2014, 10, 7)],
datetime(2014, 10, 9),
datetime(2014, 10, 11),
[datetime(2014, 10, 20), datetime(2014, 10, 22)]
]
from datetime import datetime, timedelta
def parse_date_ranges(dates):
if(not dates or not len(dates) > 0):
return False
# make sure to order dates
dates.sort()
# init values
result = []
tupl = [dates[0], dates[0]]
it = iter(dates)
date = True
def add_tuple_to_result(tuple):
# if first part of tuple differs from last part -> add full tuple
# else -> add first part of tuple only
result.append(tupl if tupl[0] != tupl[1] else tupl[0])
while date:
# get next date or False if no next date
date = next(it, False)
# filter double dates
if(date in tupl):
continue
elif(date):
if(date - timedelta(days=1) == tupl[1]):
# consecutive date, so add date to end of current tuple
tupl[1] = date
else:
# gap larger than 1 day: add current tuple to result
# and create new tuple
add_tuple_to_result(tupl)
tupl = [date, date]
else:
# date == false, so this is the last step.
# add the current tuple to result
add_tuple_to_result(tupl)
return result
预期输出:
dates = [
datetime(2014, 10, 11),
datetime(2014, 10, 1),
datetime(2014, 10, 2),
datetime(2014, 10, 3),
datetime(2014, 10, 5),
datetime(2014, 10, 5),
datetime(2014, 10, 6),
datetime(2014, 10, 22),
datetime(2014, 10, 20),
datetime(2014, 10, 21),
datetime(2014, 10, 9),
datetime(2014, 10, 7),
datetime(2014, 10, 6)
]
expect = [
[datetime(2014, 10, 1), datetime(2014, 10, 3)],
[datetime(2014, 10, 5), datetime(2014, 10, 7)],
datetime(2014, 10, 9),
datetime(2014, 10, 11),
[datetime(2014, 10, 20), datetime(2014, 10, 22)]
]
from datetime import datetime, timedelta
def parse_date_ranges(dates):
if(not dates or not len(dates) > 0):
return False
# make sure to order dates
dates.sort()
# init values
result = []
tupl = [dates[0], dates[0]]
it = iter(dates)
date = True
def add_tuple_to_result(tuple):
# if first part of tuple differs from last part -> add full tuple
# else -> add first part of tuple only
result.append(tupl if tupl[0] != tupl[1] else tupl[0])
while date:
# get next date or False if no next date
date = next(it, False)
# filter double dates
if(date in tupl):
continue
elif(date):
if(date - timedelta(days=1) == tupl[1]):
# consecutive date, so add date to end of current tuple
tupl[1] = date
else:
# gap larger than 1 day: add current tuple to result
# and create new tuple
add_tuple_to_result(tupl)
tupl = [date, date]
else:
# date == false, so this is the last step.
# add the current tuple to result
add_tuple_to_result(tupl)
return result
用Python实现:
dates = [
datetime(2014, 10, 11),
datetime(2014, 10, 1),
datetime(2014, 10, 2),
datetime(2014, 10, 3),
datetime(2014, 10, 5),
datetime(2014, 10, 5),
datetime(2014, 10, 6),
datetime(2014, 10, 22),
datetime(2014, 10, 20),
datetime(2014, 10, 21),
datetime(2014, 10, 9),
datetime(2014, 10, 7),
datetime(2014, 10, 6)
]
expect = [
[datetime(2014, 10, 1), datetime(2014, 10, 3)],
[datetime(2014, 10, 5), datetime(2014, 10, 7)],
datetime(2014, 10, 9),
datetime(2014, 10, 11),
[datetime(2014, 10, 20), datetime(2014, 10, 22)]
]
from datetime import datetime, timedelta
def parse_date_ranges(dates):
if(not dates or not len(dates) > 0):
return False
# make sure to order dates
dates.sort()
# init values
result = []
tupl = [dates[0], dates[0]]
it = iter(dates)
date = True
def add_tuple_to_result(tuple):
# if first part of tuple differs from last part -> add full tuple
# else -> add first part of tuple only
result.append(tupl if tupl[0] != tupl[1] else tupl[0])
while date:
# get next date or False if no next date
date = next(it, False)
# filter double dates
if(date in tupl):
continue
elif(date):
if(date - timedelta(days=1) == tupl[1]):
# consecutive date, so add date to end of current tuple
tupl[1] = date
else:
# gap larger than 1 day: add current tuple to result
# and create new tuple
add_tuple_to_result(tupl)
tupl = [date, date]
else:
# date == false, so this is the last step.
# add the current tuple to result
add_tuple_to_result(tupl)
return result
有关更多测试,请参阅
问题
实现是可行的,但我对Python还是新手。所以我想知道是否有更好的方法来解决这个问题?还是很好?我为您编写了另一个解决方案,并写了一些注释来解释代码
from datetime import datetime, timedelta
dates = [
datetime(2014, 10, 11),
datetime(2014, 10, 1),
datetime(2014, 10, 2),
datetime(2014, 10, 3),
datetime(2014, 10, 5),
datetime(2014, 10, 5),
datetime(2014, 10, 6),
datetime(2014, 10, 22),
datetime(2014, 10, 20),
datetime(2014, 10, 21),
datetime(2014, 10, 9),
datetime(2014, 10, 7),
datetime(2014, 10, 6)
]
# Remove duplicates, and sort the dates ascending
sorted_dates = sorted(set(dates))
# Set initial first and last element as the current element
first, last = sorted_dates[0], sorted_dates[0]
date_ranges = []
# Loop over the sorted list from the second value
for d in sorted_dates[1:]:
# Check if the current date is exactly one day later then the current
# "last" date
if d - last != timedelta(days=1):
date_ranges.append(tuple(sorted({first, last})))
first, last = d, d
else:
last = d
# Handle last element
if first == last:
date_ranges.append((first,))
else:
date_ranges.append((first, last))
for dt_pair in date_ranges:
print dt_pair
输出:
(datetime.datetime(2014, 10, 1, 0, 0), datetime.datetime(2014, 10, 3, 0, 0))
(datetime.datetime(2014, 10, 5, 0, 0), datetime.datetime(2014, 10, 7, 0, 0))
(datetime.datetime(2014, 10, 9, 0, 0),)
(datetime.datetime(2014, 10, 11, 0, 0),)
(datetime.datetime(2014, 10, 20, 0, 0), datetime.datetime(2014, 10, 22, 0, 0))
我喜欢:
结果:
(datetime.datetime(2014, 10, 1, 0, 0), datetime.datetime(2014, 10, 3, 0, 0))
(datetime.datetime(2014, 10, 5, 0, 0), datetime.datetime(2014, 10, 7, 0, 0))
datetime.datetime(2014, 10, 9, 0, 0)
datetime.datetime(2014, 10, 11, 0, 0)
(datetime.datetime(2014, 10, 20, 0, 0), datetime.datetime(2014, 10, 22, 0, 0))
(1, 3)
(5, 7)
9
11
(20, 22)
通过将其与标题类似的
使用
datetime
对象的问题:
def parse_date_ranges(dates):
def group_consecutive(dates):
dates_iter = iter(sorted(set(dates))) # de-dup and sort
run = [next(dates_iter)]
for d in dates_iter:
if (d.toordinal() - run[-1].toordinal()) == 1: # consecutive?
run.append(d)
else: # [start, end] of range else singleton
yield [run[0], run[-1]] if len(run) > 1 else run[0]
run = [d]
yield [run[0], run[-1]] if len(run) > 1 else run[0]
return list(group_consecutive(dates)) if dates else False
适应使用日期时间对象。这包括非唯一和非排序输入,并且与python3兼容:
import itertools
from datetime import datetime, timedelta
def datetimes_to_ranges(iterable):
iterable = sorted(set(iterable))
keyfunc = lambda t: t[1] - timedelta(days=t[0])
for key, group in itertools.groupby(enumerate(iterable), keyfunc):
group = list(group)
if len(group) == 1:
yield group[0][1]
else:
yield group[0][1], group[-1][1]
例如:
>>> for i in datetimes_to_ranges(dates): i
...
(datetime.datetime(2014, 10, 1, 0, 0), datetime.datetime(2014, 10, 3, 0, 0))
(datetime.datetime(2014, 10, 5, 0, 0), datetime.datetime(2014, 10, 7, 0, 0))
datetime.datetime(2014, 10, 9, 0, 0)
datetime.datetime(2014, 10, 11, 0, 0)
(datetime.datetime(2014, 10, 20, 0, 0), datetime.datetime(2014, 10, 22, 0, 0))
为什么9和11不在一起?这真的不是一个好问题。我们可以帮助您修复不起作用的代码,但是要求人们编写比您的工作代码更好的代码,或者承诺不存在这样的东西,在这里是不合适的。可能更好,但我也不知道这是否合适。不过,你可能想看看,这是我在帮助有类似问题的人之后写的。(他试图使用itertools,结果卡住了,所以我展示了各种方法来摆脱困境。首先,这到底是不是最好的解决方案,是一个更主观、更难回答的问题……)9。。(10) .. 11,所以他们没有互相跟踪。不知道代码审查。谢谢你的提示。