Python 在一个范围内生成多组随机、不重叠的间隔
在特定的整数范围内,我想生成Python 在一个范围内生成多组随机、不重叠的间隔,python,python-3.x,algorithm,random,intervals,Python,Python 3.x,Algorithm,Random,Intervals,在特定的整数范围内,我想生成n列表,每个列表由z非重叠的随机间隔组成,最小间隔宽度为w。应在单个此类列表中理解非重叠条件 a=0、b=100、n=4、z=3、w=5的示例: 1. [ [1, 17], [57, 83], [89, 98] ] 2. [ [5, 23], [42, 49], [60, 78] ] 3. [ [70, 76], [80, 89], [93, 99] ] 4. [ [20, 62], [67, 81], [82, 93] ] 目前,我使用numpy.linspace
n
列表,每个列表由z
非重叠的随机间隔组成,最小间隔宽度为w
。应在单个此类列表中理解非重叠条件
a=0、b=100、n=4、z=3、w=5的示例:
1. [ [1, 17], [57, 83], [89, 98] ]
2. [ [5, 23], [42, 49], [60, 78] ]
3. [ [70, 76], [80, 89], [93, 99] ]
4. [ [20, 62], [67, 81], [82, 93] ]
目前,我使用numpy.linspace
返回[a,b]
左间隔边界的间隔,然后为每个值引入一个小的随机变化。
在两个这样的边界内,我尝试放置正确的间隔边界,同时遵守最小宽度要求。然而,我的方法在计算上非常昂贵
在Python中实现我的目标最有效的方法是什么?一组时间间隔的一个变体(其他变体以相同的方式生成)。简单但不是很有效:
1.在a和b之间生成一组z值。在你的例子中,它是[x1,x2,x3](按升序排序)
2.将其转换为区间列表:[[x1,x1],[x2,x2],[x3,x3]]
3.按每个间隔循环:如果其下边界比上一个间隔的上边界大1,则增加其上边界。否则,如果其上边界比下一个区间的下边界小1,则减小其下区间。如果没有满足这些条件,则将间隔时间分散到任意一边。如果两者都满足-糟糕,运气不好,请从第1点重试。
4.重复步骤3,直到所有间隔最小宽度为W,并且在方法1-原始随机生成后的一些(随机数)次
这是一种低效但简单的方法-从范围(a,b)
中选取z*2
随机整数,对它们进行排序、配对,并检查间隔是否都大于或等于w
。重复此n
次
请注意,当z*w
接近len(范围(a,b))
时,这将是低效的。我考虑通过添加帮助函数来生成一个随机的<代码> NTH <代码>间隔,这将允许创建剩余的<代码> Z-N < /代码>间隔-通过从代码>范围(A,B-W*(Z-N))< /> >中选择索引,但是这会遇到这样的问题:首先选择的间隔将偏向于更长的时间。p>
代码:
def list_to_pairs(l):
return [l[i:i+2] for i in range(0, len(l), 2)]
def f(z, w, a, b):
intervals = [(0,0)]
while not all(x[1]-x[0] >= w for x in intervals):
intervals = list_to_pairs(sorted(random.sample(range(a, b), z*2)))
return intervals
def get_lists(n, z, w, a, b):
return [f(z, w, a, b) for _ in range(n)]
>>> get_lists(4, 3, 5, 0, 100)
[[[0, 17], [22, 46], [62, 98]],
[[10, 32], [61, 66], [72, 81]],
[[2, 31], [63, 68], [77, 87]],
[[5, 20], [34, 55], [58, 86]]]
def positive_integers_with_sum(n, total):
ls = [0]
rv = []
while len(ls) < n:
c = random.randint(0, total)
ls.append(c)
ls = sorted(ls)
ls.append(total)
for i in range(1, len(ls)):
rv.append(ls[i] - ls[i-1])
return rv
def f(z, w, a, b):
rv = []
indices = [x+w for x in positive_integers_with_sum(z, (b-a)-z*w)]
start = a
for i in indices:
i_start = random.randint(start, i+start-w)
i_end = random.randint(max(i_start+w, i+start-w), i+start)
rv.append([i_start, i_end - 1])
start+=i
return rv
def get_lists(n, z, w, a, b):
return [f(z, w, a, b) for _ in range(n)]
>>> get_lists(5, 3, 5, 0, 15)
[[[0, 4], [5, 9], [10, 14]],
[[0, 4], [5, 9], [10, 14]],
[[0, 4], [5, 9], [10, 14]],
[[0, 4], [5, 9], [10, 14]],
[[0, 4], [5, 9], [10, 14]]]
>>> get_lists(4, 3, 5, 0, 100)
[[[45, 72], [74, 79], [92, 97]],
[[18, 23], [39, 44], [77, 97]],
[[12, 31], [37, 53], [83, 95]],
[[13, 46], [62, 87], [94, 100]]]
rv = [[],[],[]]
for i in range(100000):
t = f(3,5,0,100)
for i in range(3):
rv[i].append(abs(t[i][1] - t[i][0]))
>>> np.mean(rv, axis=1)
array([16.10771, 16.35467, 16.21329])
输出:
def list_to_pairs(l):
return [l[i:i+2] for i in range(0, len(l), 2)]
def f(z, w, a, b):
intervals = [(0,0)]
while not all(x[1]-x[0] >= w for x in intervals):
intervals = list_to_pairs(sorted(random.sample(range(a, b), z*2)))
return intervals
def get_lists(n, z, w, a, b):
return [f(z, w, a, b) for _ in range(n)]
>>> get_lists(4, 3, 5, 0, 100)
[[[0, 17], [22, 46], [62, 98]],
[[10, 32], [61, 66], [72, 81]],
[[2, 31], [63, 68], [77, 87]],
[[5, 20], [34, 55], [58, 86]]]
def positive_integers_with_sum(n, total):
ls = [0]
rv = []
while len(ls) < n:
c = random.randint(0, total)
ls.append(c)
ls = sorted(ls)
ls.append(total)
for i in range(1, len(ls)):
rv.append(ls[i] - ls[i-1])
return rv
def f(z, w, a, b):
rv = []
indices = [x+w for x in positive_integers_with_sum(z, (b-a)-z*w)]
start = a
for i in indices:
i_start = random.randint(start, i+start-w)
i_end = random.randint(max(i_start+w, i+start-w), i+start)
rv.append([i_start, i_end - 1])
start+=i
return rv
def get_lists(n, z, w, a, b):
return [f(z, w, a, b) for _ in range(n)]
>>> get_lists(5, 3, 5, 0, 15)
[[[0, 4], [5, 9], [10, 14]],
[[0, 4], [5, 9], [10, 14]],
[[0, 4], [5, 9], [10, 14]],
[[0, 4], [5, 9], [10, 14]],
[[0, 4], [5, 9], [10, 14]]]
>>> get_lists(4, 3, 5, 0, 100)
[[[45, 72], [74, 79], [92, 97]],
[[18, 23], [39, 44], [77, 97]],
[[12, 31], [37, 53], [83, 95]],
[[13, 46], [62, 87], [94, 100]]]
rv = [[],[],[]]
for i in range(100000):
t = f(3,5,0,100)
for i in range(3):
rv[i].append(abs(t[i][1] - t[i][0]))
>>> np.mean(rv, axis=1)
array([16.10771, 16.35467, 16.21329])
方法2
@Peter O.概述了一种不依赖于随机选取间隔的方法,我在下面用一些小的逻辑变化对其进行了编码
代码:
def list_to_pairs(l):
return [l[i:i+2] for i in range(0, len(l), 2)]
def f(z, w, a, b):
intervals = [(0,0)]
while not all(x[1]-x[0] >= w for x in intervals):
intervals = list_to_pairs(sorted(random.sample(range(a, b), z*2)))
return intervals
def get_lists(n, z, w, a, b):
return [f(z, w, a, b) for _ in range(n)]
>>> get_lists(4, 3, 5, 0, 100)
[[[0, 17], [22, 46], [62, 98]],
[[10, 32], [61, 66], [72, 81]],
[[2, 31], [63, 68], [77, 87]],
[[5, 20], [34, 55], [58, 86]]]
def positive_integers_with_sum(n, total):
ls = [0]
rv = []
while len(ls) < n:
c = random.randint(0, total)
ls.append(c)
ls = sorted(ls)
ls.append(total)
for i in range(1, len(ls)):
rv.append(ls[i] - ls[i-1])
return rv
def f(z, w, a, b):
rv = []
indices = [x+w for x in positive_integers_with_sum(z, (b-a)-z*w)]
start = a
for i in indices:
i_start = random.randint(start, i+start-w)
i_end = random.randint(max(i_start+w, i+start-w), i+start)
rv.append([i_start, i_end - 1])
start+=i
return rv
def get_lists(n, z, w, a, b):
return [f(z, w, a, b) for _ in range(n)]
>>> get_lists(5, 3, 5, 0, 15)
[[[0, 4], [5, 9], [10, 14]],
[[0, 4], [5, 9], [10, 14]],
[[0, 4], [5, 9], [10, 14]],
[[0, 4], [5, 9], [10, 14]],
[[0, 4], [5, 9], [10, 14]]]
>>> get_lists(4, 3, 5, 0, 100)
[[[45, 72], [74, 79], [92, 97]],
[[18, 23], [39, 44], [77, 97]],
[[12, 31], [37, 53], [83, 95]],
[[13, 46], [62, 87], [94, 100]]]
rv = [[],[],[]]
for i in range(100000):
t = f(3,5,0,100)
for i in range(3):
rv[i].append(abs(t[i][1] - t[i][0]))
>>> np.mean(rv, axis=1)
array([16.10771, 16.35467, 16.21329])
区间平均大小:
def list_to_pairs(l):
return [l[i:i+2] for i in range(0, len(l), 2)]
def f(z, w, a, b):
intervals = [(0,0)]
while not all(x[1]-x[0] >= w for x in intervals):
intervals = list_to_pairs(sorted(random.sample(range(a, b), z*2)))
return intervals
def get_lists(n, z, w, a, b):
return [f(z, w, a, b) for _ in range(n)]
>>> get_lists(4, 3, 5, 0, 100)
[[[0, 17], [22, 46], [62, 98]],
[[10, 32], [61, 66], [72, 81]],
[[2, 31], [63, 68], [77, 87]],
[[5, 20], [34, 55], [58, 86]]]
def positive_integers_with_sum(n, total):
ls = [0]
rv = []
while len(ls) < n:
c = random.randint(0, total)
ls.append(c)
ls = sorted(ls)
ls.append(total)
for i in range(1, len(ls)):
rv.append(ls[i] - ls[i-1])
return rv
def f(z, w, a, b):
rv = []
indices = [x+w for x in positive_integers_with_sum(z, (b-a)-z*w)]
start = a
for i in indices:
i_start = random.randint(start, i+start-w)
i_end = random.randint(max(i_start+w, i+start-w), i+start)
rv.append([i_start, i_end - 1])
start+=i
return rv
def get_lists(n, z, w, a, b):
return [f(z, w, a, b) for _ in range(n)]
>>> get_lists(5, 3, 5, 0, 15)
[[[0, 4], [5, 9], [10, 14]],
[[0, 4], [5, 9], [10, 14]],
[[0, 4], [5, 9], [10, 14]],
[[0, 4], [5, 9], [10, 14]],
[[0, 4], [5, 9], [10, 14]]]
>>> get_lists(4, 3, 5, 0, 100)
[[[45, 72], [74, 79], [92, 97]],
[[18, 23], [39, 44], [77, 97]],
[[12, 31], [37, 53], [83, 95]],
[[13, 46], [62, 87], [94, 100]]]
rv = [[],[],[]]
for i in range(100000):
t = f(3,5,0,100)
for i in range(3):
rv[i].append(abs(t[i][1] - t[i][0]))
>>> np.mean(rv, axis=1)
array([16.10771, 16.35467, 16.21329])
输出:
def list_to_pairs(l):
return [l[i:i+2] for i in range(0, len(l), 2)]
def f(z, w, a, b):
intervals = [(0,0)]
while not all(x[1]-x[0] >= w for x in intervals):
intervals = list_to_pairs(sorted(random.sample(range(a, b), z*2)))
return intervals
def get_lists(n, z, w, a, b):
return [f(z, w, a, b) for _ in range(n)]
>>> get_lists(4, 3, 5, 0, 100)
[[[0, 17], [22, 46], [62, 98]],
[[10, 32], [61, 66], [72, 81]],
[[2, 31], [63, 68], [77, 87]],
[[5, 20], [34, 55], [58, 86]]]
def positive_integers_with_sum(n, total):
ls = [0]
rv = []
while len(ls) < n:
c = random.randint(0, total)
ls.append(c)
ls = sorted(ls)
ls.append(total)
for i in range(1, len(ls)):
rv.append(ls[i] - ls[i-1])
return rv
def f(z, w, a, b):
rv = []
indices = [x+w for x in positive_integers_with_sum(z, (b-a)-z*w)]
start = a
for i in indices:
i_start = random.randint(start, i+start-w)
i_end = random.randint(max(i_start+w, i+start-w), i+start)
rv.append([i_start, i_end - 1])
start+=i
return rv
def get_lists(n, z, w, a, b):
return [f(z, w, a, b) for _ in range(n)]
>>> get_lists(5, 3, 5, 0, 15)
[[[0, 4], [5, 9], [10, 14]],
[[0, 4], [5, 9], [10, 14]],
[[0, 4], [5, 9], [10, 14]],
[[0, 4], [5, 9], [10, 14]],
[[0, 4], [5, 9], [10, 14]]]
>>> get_lists(4, 3, 5, 0, 100)
[[[45, 72], [74, 79], [92, 97]],
[[18, 23], [39, 44], [77, 97]],
[[12, 31], [37, 53], [83, 95]],
[[13, 46], [62, 87], [94, 100]]]
rv = [[],[],[]]
for i in range(100000):
t = f(3,5,0,100)
for i in range(3):
rv[i].append(abs(t[i][1] - t[i][0]))
>>> np.mean(rv, axis=1)
array([16.10771, 16.35467, 16.21329])
以下是建议算法的示意图:
生成z
非负整数(整数0或更大),其总和为((b-a)+1)-z*w
。我根据Smith和Tromble的“从单位单纯形均匀采样”编写了此算法
将w
添加到以这种方式生成的每个数字。这将导致z
连续候选区间的大小
在每个候选区间内生成一个最小长度w
的随机子区间。这些子区间是算法的实际输出。每个子间隔相应地移动a
及其候选间隔的开始
这是一个构建间隔的版本,因此间隔必须符合规范(因此它永远不需要“在幸运之前一直选择随机值”):
定义正确的边界(在w
和到下一个间隔开始的空间之间)然后添加变化,因为你会知道添加多少的限制,引入重叠,不是更好吗?或者更好:首先选择你的间隔大小,然后按顺序排列,从未使用的空间中随机抽取中间的内容。@ScottHunter感谢您的建议@SimonFinkall(x[1]-x[0]>=w表示x的间隔)
会这样吗?@ScottHunter我在我的答案中添加了一点。这是否保证a
始终会开始第一个间隔?或者a
+1会吗?这种方法陷入了我试图通过随机生成区间来避免的陷阱——区间的平均大小是倾斜的,因此较早的区间会更大。在使用OP中的示例进行的100000次测试中,生成间隔的平均大小为~[25.05,14.53,9.24]
。调整随机数生成器以适应口味。