使用Python将连续日期分组

使用Python将连续日期分组,python,python-2.7,Python,Python 2.7,给定: dates = [ datetime(2014, 10, 11), datetime(2014, 10, 1), datetime(2014, 10, 2), datetime(2014, 10, 3), datetime(2014, 10, 5), datetime(2014, 10, 5), datetime(2014, 10, 6), datetime(2014, 10, 22), datetime(2014, 10, 20),

给定:

dates = [
  datetime(2014, 10, 11), 

  datetime(2014, 10, 1), 
  datetime(2014, 10, 2),
  datetime(2014, 10, 3),

  datetime(2014, 10, 5), 
  datetime(2014, 10, 5), 
  datetime(2014, 10, 6), 

  datetime(2014, 10, 22), 
  datetime(2014, 10, 20),
  datetime(2014, 10, 21),

  datetime(2014, 10, 9), 

  datetime(2014, 10, 7),
  datetime(2014, 10, 6)
]
expect = [
  [datetime(2014, 10, 1), datetime(2014, 10, 3)],
  [datetime(2014, 10, 5), datetime(2014, 10, 7)],
   datetime(2014, 10, 9),
   datetime(2014, 10, 11),
  [datetime(2014, 10, 20), datetime(2014, 10, 22)]
]
from datetime import datetime, timedelta

def parse_date_ranges(dates):
    if(not dates or not len(dates) > 0):
        return False

    # make sure to order dates
    dates.sort()

    # init values
    result  = []
    tupl    = [dates[0], dates[0]]
    it      = iter(dates)
    date    = True

    def add_tuple_to_result(tuple):
        # if first part of tuple differs from last part -> add full tuple
        # else -> add first part of tuple only
        result.append(tupl if tupl[0] != tupl[1] else tupl[0])

    while date:
        # get next date or False if no next date
        date = next(it, False)

        # filter double dates
        if(date in tupl):
            continue

        elif(date):
            if(date - timedelta(days=1) == tupl[1]):
                # consecutive date, so add date to end of current tuple
                tupl[1] = date
            else:
                # gap larger than 1 day: add current tuple to result
                # and create new tuple
                add_tuple_to_result(tupl)
                tupl = [date, date]

        else:
            # date == false, so this is the last step.
            # add the current tuple to result
            add_tuple_to_result(tupl)

    return result
预期输出:

dates = [
  datetime(2014, 10, 11), 

  datetime(2014, 10, 1), 
  datetime(2014, 10, 2),
  datetime(2014, 10, 3),

  datetime(2014, 10, 5), 
  datetime(2014, 10, 5), 
  datetime(2014, 10, 6), 

  datetime(2014, 10, 22), 
  datetime(2014, 10, 20),
  datetime(2014, 10, 21),

  datetime(2014, 10, 9), 

  datetime(2014, 10, 7),
  datetime(2014, 10, 6)
]
expect = [
  [datetime(2014, 10, 1), datetime(2014, 10, 3)],
  [datetime(2014, 10, 5), datetime(2014, 10, 7)],
   datetime(2014, 10, 9),
   datetime(2014, 10, 11),
  [datetime(2014, 10, 20), datetime(2014, 10, 22)]
]
from datetime import datetime, timedelta

def parse_date_ranges(dates):
    if(not dates or not len(dates) > 0):
        return False

    # make sure to order dates
    dates.sort()

    # init values
    result  = []
    tupl    = [dates[0], dates[0]]
    it      = iter(dates)
    date    = True

    def add_tuple_to_result(tuple):
        # if first part of tuple differs from last part -> add full tuple
        # else -> add first part of tuple only
        result.append(tupl if tupl[0] != tupl[1] else tupl[0])

    while date:
        # get next date or False if no next date
        date = next(it, False)

        # filter double dates
        if(date in tupl):
            continue

        elif(date):
            if(date - timedelta(days=1) == tupl[1]):
                # consecutive date, so add date to end of current tuple
                tupl[1] = date
            else:
                # gap larger than 1 day: add current tuple to result
                # and create new tuple
                add_tuple_to_result(tupl)
                tupl = [date, date]

        else:
            # date == false, so this is the last step.
            # add the current tuple to result
            add_tuple_to_result(tupl)

    return result
用Python实现:

dates = [
  datetime(2014, 10, 11), 

  datetime(2014, 10, 1), 
  datetime(2014, 10, 2),
  datetime(2014, 10, 3),

  datetime(2014, 10, 5), 
  datetime(2014, 10, 5), 
  datetime(2014, 10, 6), 

  datetime(2014, 10, 22), 
  datetime(2014, 10, 20),
  datetime(2014, 10, 21),

  datetime(2014, 10, 9), 

  datetime(2014, 10, 7),
  datetime(2014, 10, 6)
]
expect = [
  [datetime(2014, 10, 1), datetime(2014, 10, 3)],
  [datetime(2014, 10, 5), datetime(2014, 10, 7)],
   datetime(2014, 10, 9),
   datetime(2014, 10, 11),
  [datetime(2014, 10, 20), datetime(2014, 10, 22)]
]
from datetime import datetime, timedelta

def parse_date_ranges(dates):
    if(not dates or not len(dates) > 0):
        return False

    # make sure to order dates
    dates.sort()

    # init values
    result  = []
    tupl    = [dates[0], dates[0]]
    it      = iter(dates)
    date    = True

    def add_tuple_to_result(tuple):
        # if first part of tuple differs from last part -> add full tuple
        # else -> add first part of tuple only
        result.append(tupl if tupl[0] != tupl[1] else tupl[0])

    while date:
        # get next date or False if no next date
        date = next(it, False)

        # filter double dates
        if(date in tupl):
            continue

        elif(date):
            if(date - timedelta(days=1) == tupl[1]):
                # consecutive date, so add date to end of current tuple
                tupl[1] = date
            else:
                # gap larger than 1 day: add current tuple to result
                # and create new tuple
                add_tuple_to_result(tupl)
                tupl = [date, date]

        else:
            # date == false, so this is the last step.
            # add the current tuple to result
            add_tuple_to_result(tupl)

    return result
有关更多测试,请参阅

问题


实现是可行的,但我对Python还是新手。所以我想知道是否有更好的方法来解决这个问题?还是很好?

我为您编写了另一个解决方案,并写了一些注释来解释代码

from datetime import datetime, timedelta

dates = [
    datetime(2014, 10, 11),
    datetime(2014, 10, 1),
    datetime(2014, 10, 2),
    datetime(2014, 10, 3),
    datetime(2014, 10, 5),
    datetime(2014, 10, 5),
    datetime(2014, 10, 6),
    datetime(2014, 10, 22),
    datetime(2014, 10, 20),
    datetime(2014, 10, 21),
    datetime(2014, 10, 9),
    datetime(2014, 10, 7),
    datetime(2014, 10, 6)
]
# Remove duplicates, and sort the dates ascending
sorted_dates = sorted(set(dates))
# Set initial first and last element as the current element
first, last = sorted_dates[0], sorted_dates[0]
date_ranges = []

# Loop over the sorted list from the second value
for d in sorted_dates[1:]:
    # Check if the current date is exactly one day later then the current
    # "last" date
    if d - last != timedelta(days=1):
        date_ranges.append(tuple(sorted({first, last})))
        first, last = d, d
    else:
        last = d

# Handle last element
if first == last:
    date_ranges.append((first,))
else:
    date_ranges.append((first, last))

for dt_pair in date_ranges:
    print dt_pair
输出:

(datetime.datetime(2014, 10, 1, 0, 0), datetime.datetime(2014, 10, 3, 0, 0))
(datetime.datetime(2014, 10, 5, 0, 0), datetime.datetime(2014, 10, 7, 0, 0))
(datetime.datetime(2014, 10, 9, 0, 0),)
(datetime.datetime(2014, 10, 11, 0, 0),)
(datetime.datetime(2014, 10, 20, 0, 0), datetime.datetime(2014, 10, 22, 0, 0))
我喜欢:

结果:

(datetime.datetime(2014, 10, 1, 0, 0), datetime.datetime(2014, 10, 3, 0, 0))
(datetime.datetime(2014, 10, 5, 0, 0), datetime.datetime(2014, 10, 7, 0, 0))
datetime.datetime(2014, 10, 9, 0, 0)
datetime.datetime(2014, 10, 11, 0, 0)
(datetime.datetime(2014, 10, 20, 0, 0), datetime.datetime(2014, 10, 22, 0, 0))
(1, 3)
(5, 7)
9
11
(20, 22)

通过将其与标题类似的
使用
datetime
对象的问题:

def parse_date_ranges(dates):

    def group_consecutive(dates):
        dates_iter = iter(sorted(set(dates)))  # de-dup and sort

        run = [next(dates_iter)]
        for d in dates_iter:
            if (d.toordinal() - run[-1].toordinal()) == 1:  # consecutive?
                run.append(d)
            else:  # [start, end] of range else singleton
                yield [run[0], run[-1]] if len(run) > 1 else run[0]
                run = [d]

        yield [run[0], run[-1]] if len(run) > 1 else run[0]

    return list(group_consecutive(dates)) if dates else False
适应使用日期时间对象。这包括非唯一和非排序输入,并且与python3兼容:

import itertools
from datetime import datetime, timedelta

def datetimes_to_ranges(iterable):
    iterable = sorted(set(iterable))
    keyfunc = lambda t: t[1] - timedelta(days=t[0])
    for key, group in itertools.groupby(enumerate(iterable), keyfunc):
        group = list(group)
        if len(group) == 1:
            yield group[0][1]
        else:
            yield group[0][1], group[-1][1]
例如:

>>> for i in datetimes_to_ranges(dates): i
... 
(datetime.datetime(2014, 10, 1, 0, 0), datetime.datetime(2014, 10, 3, 0, 0))
(datetime.datetime(2014, 10, 5, 0, 0), datetime.datetime(2014, 10, 7, 0, 0))
datetime.datetime(2014, 10, 9, 0, 0)
datetime.datetime(2014, 10, 11, 0, 0)
(datetime.datetime(2014, 10, 20, 0, 0), datetime.datetime(2014, 10, 22, 0, 0))

为什么9和11不在一起?这真的不是一个好问题。我们可以帮助您修复不起作用的代码,但是要求人们编写比您的工作代码更好的代码,或者承诺不存在这样的东西,在这里是不合适的。可能更好,但我也不知道这是否合适。不过,你可能想看看,这是我在帮助有类似问题的人之后写的。(他试图使用itertools,结果卡住了,所以我展示了各种方法来摆脱困境。首先,这到底是不是最好的解决方案,是一个更主观、更难回答的问题……)9。。(10) .. 11,所以他们没有互相跟踪。不知道代码审查。谢谢你的提示。