Python 在一个范围内生成多组随机、不重叠的间隔_Python_Python 3.x_Algorithm_Random_Intervals

Python 在一个范围内生成多组随机、不重叠的间隔
python python-3.x algorithm random
Python 在一个范围内生成多组随机、不重叠的间隔,python,python-3.x,algorithm,random,intervals,Python,Python 3.x,Algorithm,Random,Intervals,在特定的整数范围内，我想生成n列表，每个列表由z非重叠的随机间隔组成，最小间隔宽度为w。应在单个此类列表中理解非重叠条件 a=0、b=100、n=4、z=3、w=5的示例： 1. [ [1, 17], [57, 83], [89, 98] ] 2. [ [5, 23], [42, 49], [60, 78] ] 3. [ [70, 76], [80, 89], [93, 99] ] 4. [ [20, 62], [67, 81], [82, 93] ] 目前，我使用numpy.linspace
在特定的整数范围内，我想生成
列表，每个列表由
非重叠的随机间隔组成，最小间隔宽度为
。应在单个此类列表中理解非重叠条件
a=0、b=100、n=4、z=3、w=5的示例：
1. [ [1, 17], [57, 83], [89, 98] ]
2. [ [5, 23], [42, 49], [60, 78] ]
3. [ [70, 76], [80, 89], [93, 99] ]
4. [ [20, 62], [67, 81], [82, 93] ]

目前，我使用numpy.linspace
返回[a，b]
左间隔边界的间隔，然后为每个值引入一个小的随机变化。
在两个这样的边界内，我尝试放置正确的间隔边界，同时遵守最小宽度要求。然而，我的方法在计算上非常昂贵
在Python中实现我的目标最有效的方法是什么？一组时间间隔的一个变体（其他变体以相同的方式生成）。简单但不是很有效：
1.在a和b之间生成一组z值。在你的例子中，它是[x1，x2，x3]（按升序排序）
2.将其转换为区间列表：[[x1，x1]，[x2，x2]，[x3，x3]]
3.按每个间隔循环：如果其下边界比上一个间隔的上边界大1，则增加其上边界。否则，如果其上边界比下一个区间的下边界小1，则减小其下区间。如果没有满足这些条件，则将间隔时间分散到任意一边。如果两者都满足-糟糕，运气不好，请从第1点重试。
4.重复步骤3，直到所有间隔最小宽度为W，并且在方法1-原始随机生成后的一些（随机数）次
这是一种低效但简单的方法-从范围（a，b）
中选取z*2
随机整数，对它们进行排序、配对，并检查间隔是否都大于或等于w
。重复此n
次
请注意，当z*w
接近len（范围（a，b））时，这将是低效的。我考虑通过添加帮助函数来生成一个随机的<代码> NTH <代码>间隔，这将允许创建剩余的<代码> Z-N < /代码>间隔-通过从代码>范围（A，B-W*（Z-N））< /> >中选择索引，但是这会遇到这样的问题：首先选择的间隔将偏向于更长的时间。p>
代码：
def list_to_pairs(l):
    return [l[i:i+2] for i in range(0, len(l), 2)]

def f(z, w, a, b):
    intervals = [(0,0)]
    while not all(x[1]-x[0] >= w for x in intervals):
        intervals = list_to_pairs(sorted(random.sample(range(a, b), z*2)))
    return intervals

def get_lists(n, z, w, a, b):
    return [f(z, w, a, b) for _ in range(n)]

>>> get_lists(4, 3, 5, 0, 100)
[[[0, 17], [22, 46], [62, 98]],
 [[10, 32], [61, 66], [72, 81]],
 [[2, 31], [63, 68], [77, 87]],
 [[5, 20], [34, 55], [58, 86]]]

def positive_integers_with_sum(n, total):
    ls = [0]
    rv = []
    while len(ls) < n:
        c = random.randint(0, total)
        ls.append(c)
    ls = sorted(ls)
    ls.append(total)
    for i in range(1, len(ls)):
        rv.append(ls[i] - ls[i-1])
    return rv

def f(z, w, a, b):
    rv = []
    indices = [x+w for x in positive_integers_with_sum(z, (b-a)-z*w)]
    start = a
    for i in indices:
        i_start = random.randint(start, i+start-w)
        i_end = random.randint(max(i_start+w, i+start-w), i+start)
        rv.append([i_start, i_end - 1])
        start+=i
    return rv

def get_lists(n, z, w, a, b):
    return [f(z, w, a, b) for _ in range(n)]

>>> get_lists(5, 3, 5, 0, 15)
[[[0, 4], [5, 9], [10, 14]],
 [[0, 4], [5, 9], [10, 14]],
 [[0, 4], [5, 9], [10, 14]],
 [[0, 4], [5, 9], [10, 14]],
 [[0, 4], [5, 9], [10, 14]]]

>>> get_lists(4, 3, 5, 0, 100)
[[[45, 72], [74, 79], [92, 97]],
 [[18, 23], [39, 44], [77, 97]],
 [[12, 31], [37, 53], [83, 95]],
 [[13, 46], [62, 87], [94, 100]]]

rv = [[],[],[]]

for i in range(100000):
    t = f(3,5,0,100)
    for i in range(3):
        rv[i].append(abs(t[i][1] - t[i][0]))

>>> np.mean(rv, axis=1)
array([16.10771, 16.35467, 16.21329])

输出：
def list_to_pairs(l):
    return [l[i:i+2] for i in range(0, len(l), 2)]

def f(z, w, a, b):
    intervals = [(0,0)]
    while not all(x[1]-x[0] >= w for x in intervals):
        intervals = list_to_pairs(sorted(random.sample(range(a, b), z*2)))
    return intervals

def get_lists(n, z, w, a, b):
    return [f(z, w, a, b) for _ in range(n)]

>>> get_lists(4, 3, 5, 0, 100)
[[[0, 17], [22, 46], [62, 98]],
 [[10, 32], [61, 66], [72, 81]],
 [[2, 31], [63, 68], [77, 87]],
 [[5, 20], [34, 55], [58, 86]]]

def positive_integers_with_sum(n, total):
    ls = [0]
    rv = []
    while len(ls) < n:
        c = random.randint(0, total)
        ls.append(c)
    ls = sorted(ls)
    ls.append(total)
    for i in range(1, len(ls)):
        rv.append(ls[i] - ls[i-1])
    return rv

def f(z, w, a, b):
    rv = []
    indices = [x+w for x in positive_integers_with_sum(z, (b-a)-z*w)]
    start = a
    for i in indices:
        i_start = random.randint(start, i+start-w)
        i_end = random.randint(max(i_start+w, i+start-w), i+start)
        rv.append([i_start, i_end - 1])
        start+=i
    return rv

def get_lists(n, z, w, a, b):
    return [f(z, w, a, b) for _ in range(n)]

>>> get_lists(5, 3, 5, 0, 15)
[[[0, 4], [5, 9], [10, 14]],
 [[0, 4], [5, 9], [10, 14]],
 [[0, 4], [5, 9], [10, 14]],
 [[0, 4], [5, 9], [10, 14]],
 [[0, 4], [5, 9], [10, 14]]]

>>> get_lists(4, 3, 5, 0, 100)
[[[45, 72], [74, 79], [92, 97]],
 [[18, 23], [39, 44], [77, 97]],
 [[12, 31], [37, 53], [83, 95]],
 [[13, 46], [62, 87], [94, 100]]]

rv = [[],[],[]]

for i in range(100000):
    t = f(3,5,0,100)
    for i in range(3):
        rv[i].append(abs(t[i][1] - t[i][0]))

>>> np.mean(rv, axis=1)
array([16.10771, 16.35467, 16.21329])

方法2
@Peter O.概述了一种不依赖于随机选取间隔的方法，我在下面用一些小的逻辑变化对其进行了编码
代码：
def list_to_pairs(l):
    return [l[i:i+2] for i in range(0, len(l), 2)]

def f(z, w, a, b):
    intervals = [(0,0)]
    while not all(x[1]-x[0] >= w for x in intervals):
        intervals = list_to_pairs(sorted(random.sample(range(a, b), z*2)))
    return intervals

def get_lists(n, z, w, a, b):
    return [f(z, w, a, b) for _ in range(n)]

>>> get_lists(4, 3, 5, 0, 100)
[[[0, 17], [22, 46], [62, 98]],
 [[10, 32], [61, 66], [72, 81]],
 [[2, 31], [63, 68], [77, 87]],
 [[5, 20], [34, 55], [58, 86]]]

def positive_integers_with_sum(n, total):
    ls = [0]
    rv = []
    while len(ls) < n:
        c = random.randint(0, total)
        ls.append(c)
    ls = sorted(ls)
    ls.append(total)
    for i in range(1, len(ls)):
        rv.append(ls[i] - ls[i-1])
    return rv

def f(z, w, a, b):
    rv = []
    indices = [x+w for x in positive_integers_with_sum(z, (b-a)-z*w)]
    start = a
    for i in indices:
        i_start = random.randint(start, i+start-w)
        i_end = random.randint(max(i_start+w, i+start-w), i+start)
        rv.append([i_start, i_end - 1])
        start+=i
    return rv

def get_lists(n, z, w, a, b):
    return [f(z, w, a, b) for _ in range(n)]

>>> get_lists(5, 3, 5, 0, 15)
[[[0, 4], [5, 9], [10, 14]],
 [[0, 4], [5, 9], [10, 14]],
 [[0, 4], [5, 9], [10, 14]],
 [[0, 4], [5, 9], [10, 14]],
 [[0, 4], [5, 9], [10, 14]]]

>>> get_lists(4, 3, 5, 0, 100)
[[[45, 72], [74, 79], [92, 97]],
 [[18, 23], [39, 44], [77, 97]],
 [[12, 31], [37, 53], [83, 95]],
 [[13, 46], [62, 87], [94, 100]]]

rv = [[],[],[]]

for i in range(100000):
    t = f(3,5,0,100)
    for i in range(3):
        rv[i].append(abs(t[i][1] - t[i][0]))

>>> np.mean(rv, axis=1)
array([16.10771, 16.35467, 16.21329])

区间平均大小：
def list_to_pairs(l):
    return [l[i:i+2] for i in range(0, len(l), 2)]

def f(z, w, a, b):
    intervals = [(0,0)]
    while not all(x[1]-x[0] >= w for x in intervals):
        intervals = list_to_pairs(sorted(random.sample(range(a, b), z*2)))
    return intervals

def get_lists(n, z, w, a, b):
    return [f(z, w, a, b) for _ in range(n)]

>>> get_lists(4, 3, 5, 0, 100)
[[[0, 17], [22, 46], [62, 98]],
 [[10, 32], [61, 66], [72, 81]],
 [[2, 31], [63, 68], [77, 87]],
 [[5, 20], [34, 55], [58, 86]]]

def positive_integers_with_sum(n, total):
    ls = [0]
    rv = []
    while len(ls) < n:
        c = random.randint(0, total)
        ls.append(c)
    ls = sorted(ls)
    ls.append(total)
    for i in range(1, len(ls)):
        rv.append(ls[i] - ls[i-1])
    return rv

def f(z, w, a, b):
    rv = []
    indices = [x+w for x in positive_integers_with_sum(z, (b-a)-z*w)]
    start = a
    for i in indices:
        i_start = random.randint(start, i+start-w)
        i_end = random.randint(max(i_start+w, i+start-w), i+start)
        rv.append([i_start, i_end - 1])
        start+=i
    return rv

def get_lists(n, z, w, a, b):
    return [f(z, w, a, b) for _ in range(n)]

>>> get_lists(5, 3, 5, 0, 15)
[[[0, 4], [5, 9], [10, 14]],
 [[0, 4], [5, 9], [10, 14]],
 [[0, 4], [5, 9], [10, 14]],
 [[0, 4], [5, 9], [10, 14]],
 [[0, 4], [5, 9], [10, 14]]]

>>> get_lists(4, 3, 5, 0, 100)
[[[45, 72], [74, 79], [92, 97]],
 [[18, 23], [39, 44], [77, 97]],
 [[12, 31], [37, 53], [83, 95]],
 [[13, 46], [62, 87], [94, 100]]]

rv = [[],[],[]]

for i in range(100000):
    t = f(3,5,0,100)
    for i in range(3):
        rv[i].append(abs(t[i][1] - t[i][0]))

>>> np.mean(rv, axis=1)
array([16.10771, 16.35467, 16.21329])

输出：
def list_to_pairs(l):
    return [l[i:i+2] for i in range(0, len(l), 2)]

def f(z, w, a, b):
    intervals = [(0,0)]
    while not all(x[1]-x[0] >= w for x in intervals):
        intervals = list_to_pairs(sorted(random.sample(range(a, b), z*2)))
    return intervals

def get_lists(n, z, w, a, b):
    return [f(z, w, a, b) for _ in range(n)]

>>> get_lists(4, 3, 5, 0, 100)
[[[0, 17], [22, 46], [62, 98]],
 [[10, 32], [61, 66], [72, 81]],
 [[2, 31], [63, 68], [77, 87]],
 [[5, 20], [34, 55], [58, 86]]]

def positive_integers_with_sum(n, total):
    ls = [0]
    rv = []
    while len(ls) < n:
        c = random.randint(0, total)
        ls.append(c)
    ls = sorted(ls)
    ls.append(total)
    for i in range(1, len(ls)):
        rv.append(ls[i] - ls[i-1])
    return rv

def f(z, w, a, b):
    rv = []
    indices = [x+w for x in positive_integers_with_sum(z, (b-a)-z*w)]
    start = a
    for i in indices:
        i_start = random.randint(start, i+start-w)
        i_end = random.randint(max(i_start+w, i+start-w), i+start)
        rv.append([i_start, i_end - 1])
        start+=i
    return rv

def get_lists(n, z, w, a, b):
    return [f(z, w, a, b) for _ in range(n)]

>>> get_lists(5, 3, 5, 0, 15)
[[[0, 4], [5, 9], [10, 14]],
 [[0, 4], [5, 9], [10, 14]],
 [[0, 4], [5, 9], [10, 14]],
 [[0, 4], [5, 9], [10, 14]],
 [[0, 4], [5, 9], [10, 14]]]

>>> get_lists(4, 3, 5, 0, 100)
[[[45, 72], [74, 79], [92, 97]],
 [[18, 23], [39, 44], [77, 97]],
 [[12, 31], [37, 53], [83, 95]],
 [[13, 46], [62, 87], [94, 100]]]

rv = [[],[],[]]

for i in range(100000):
    t = f(3,5,0,100)
    for i in range(3):
        rv[i].append(abs(t[i][1] - t[i][0]))

>>> np.mean(rv, axis=1)
array([16.10771, 16.35467, 16.21329])

以下是建议算法的示意图：
生成z
非负整数（整数0或更大），其总和为（（b-a）+1）-z*w
。我根据Smith和Tromble的“从单位单纯形均匀采样”编写了此算法
将w
添加到以这种方式生成的每个数字。这将导致z
连续候选区间的大小
在每个候选区间内生成一个最小长度w
的随机子区间。这些子区间是算法的实际输出。每个子间隔相应地移动a
及其候选间隔的开始
这是一个构建间隔的版本，因此间隔必须符合规范（因此它永远不需要“在幸运之前一直选择随机值”）：
定义正确的边界（在w
和到下一个间隔开始的空间之间）然后添加变化，因为你会知道添加多少的限制，引入重叠，不是更好吗？或者更好：首先选择你的间隔大小，然后按顺序排列，从未使用的空间中随机抽取中间的内容。@ScottHunter感谢您的建议@SimonFinkall（x[1]-x[0]>=w表示x的间隔）
会这样吗？@ScottHunter我在我的答案中添加了一点。这是否保证a
始终会开始第一个间隔？或者a
+1会吗？这种方法陷入了我试图通过随机生成区间来避免的陷阱——区间的平均大小是倾斜的，因此较早的区间会更大。在使用OP中的示例进行的100000次测试中，生成间隔的平均大小为~[25.05,14.53,9.24]
。调整随机数生成器以适应口味。