Python列表筛选:从列表列表中删除子集
使用Python如何通过有序子集匹配减少列表列表 在这个问题的上下文中,如果Python列表筛选:从列表列表中删除子集,python,list,Python,List,使用Python如何通过有序子集匹配减少列表列表 在这个问题的上下文中,如果M包含L的所有成员,则列表L是列表M的子集,且顺序相同。例如,列表[1,2]是列表[1,2,3]的子集,但不是列表[2,1,3]的子集 输入示例: 预期结果: 其他例子: L=[[1,2,3,4,5,6,7],[1,2,5,6]]-不减少 L=[[1,2,3,4,5,6,7],[1,2,3],[1,2,4,8]-是 L=[[1,2,3,4,5,6,7],[7,6,5,4,3,2,1]-不减少 (很抱歉给不正确的数据集造成
M
包含L
的所有成员,则列表L是列表M
的子集,且顺序相同。例如,列表[1,2]是列表[1,2,3]的子集,但不是列表[2,1,3]的子集
输入示例:
预期结果:
其他例子:
L=[[1,2,3,4,5,6,7],[1,2,5,6]]
-不减少
L=[[1,2,3,4,5,6,7],
[1,2,3],[1,2,4,8]
-是
L=[[1,2,3,4,5,6,7],[7,6,5,4,3,2,1]
-不减少
(很抱歉给不正确的数据集造成混乱。)编辑:我真的需要提高我的阅读理解能力。以下是实际问题的答案。它利用了一个事实,即“A是B的super
”意味着“len(A)>len(B)或A==B
”
如果您还需要保留序列的原始顺序,则find_supersequences()
函数需要跟踪序列的位置,并随后对输出进行排序
list0=[[1, 2, 4, 8], [1, 2, 4, 5, 6], [1, 2, 3], [2, 3, 21], [1, 2, 3, 4], [1, 2, 3, 4, 5, 6, 7]]
for list1 in list0[:]:
for list2 in list0:
if list2!=list1:
len1=len(list1)
c=0
for n in list2:
if n==list1[c]:
c+=1
if c==len1:
list0.remove(list1)
break
这将使用列表0的副本就地筛选列表0。如果预期结果与原始大小大致相同,则这是好的,因为只有少数“子集”要删除
如果预期结果是小的,而原始列表是大的,那么您可能更喜欢这个更容易记忆的列表,因为它不会复制原始列表
list0=[[1, 2, 4, 8], [1, 2, 4, 5, 6], [1, 2, 3], [2, 3, 21], [1, 2, 3, 4], [1, 2, 3, 4, 5, 6, 7]]
result=[]
for list1 in list0:
subset=False
for list2 in list0:
if list2!=list1:
len1=len(list1)
c=0
for n in list2:
if n==list1[c]:
c+=1
if c==len1:
subset=True
break
if subset:
break
if not subset:
result.append(list1)
这似乎有效:
original=[[1, 2, 4, 8], [1, 2, 4, 5, 6], [1, 2, 3], [2, 3, 21], [1, 2, 3, 4], [1, 2, 3, 4, 5, 6, 7]]
target=[[1, 2, 4, 8], [2, 3, 21], [1, 2, 3, 4, 5, 6, 7]]
class SetAndList:
def __init__(self,aList):
self.list=aList
self.set=set(aList)
self.isUnique=True
def compare(self,aList):
s=set(aList)
if self.set.issubset(s):
#print self.list,'superceded by',aList
self.isUnique=False
def listReduce(lists):
temp=[]
for l in lists:
for t in temp:
t.compare(l)
temp.append( SetAndList(l) )
return [t.list for t in temp if t.isUnique]
print listReduce(original)
print target
这将打印计算出的列表和目标以进行视觉比较
在compare方法中取消对打印行的注释,以查看各种列表是如何被替换的
使用Python2.6.2进行测试时,我实现了一个不同的issubseq
,因为您没有说[1,2,4,5,6]
是[1,2,3,4,5,6,7]
的子序列(除了速度非常慢之外)。我提出的解决方案如下所示:
def is_subseq(a, b):
if len(a) > len(b): return False
start = 0
for el in a:
while start < len(b):
if el == b[start]:
break
start = start + 1
else:
return False
return True
def filter_partial_matches(sets):
return [s for s in sets if all([not(is_subseq(s, ss)) for ss in sets if s != ss])]
希望有帮助 此代码应该具有相当高的内存效率。除了存储初始列表外,此代码使用的额外内存可以忽略不计(不会创建列表的临时集或副本)
def是_子集(针、草堆):
“”“检查针是否按O(n)中草堆的子集排序”
如果len(干草堆)>> [[1, 2, 4, 8], [2, 3, 21], [1, 2, 3, 4, 5, 6, 7]]
还有,为了好玩,一行:
def filter_list(L):
return [x for x in L if not any(set(x)<=set(y) for y in L if x is not y)]
def过滤器列表(L):
如果列表不是任何其他列表的子集,那么它就是一个超级列表。如果列表中的每个元素都可以按顺序在另一个列表中找到,那么它就是另一个列表的子集
这是我的密码:
def is_sublist_of_any_list(cand, lists):
# Compare candidate to a single list
def is_sublist_of_list(cand, target):
try:
i = 0
for c in cand:
i = 1 + target.index(c, i)
return True
except ValueError:
return False
# See if candidate matches any other list
return any(is_sublist_of_list(cand, target) for target in lists if len(cand) <= len(target))
# Compare candidates to all other lists
def super_lists(lists):
return [cand for i, cand in enumerate(lists) if not is_sublist_of_any_list(cand, lists[:i] + lists[i+1:])]
if __name__ == '__main__':
lists = [[1, 2, 4, 8], [1, 2, 4, 5, 6], [1, 2, 3], [2, 3, 21], [1, 2, 3, 4], [1, 2, 3, 4, 5, 6, 7]]
superlists = super_lists(lists)
print superlists
编辑:为以后的数据集编辑结果
>>> lists = [[2, 16, 17], [1, 2, 3, 4, 5, 6, 7], [1], [1, 2, 3, 4], [1, 2], [17,
18, 19, 22, 41, 48], [2, 3], [1, 2, 3], [50, 69], [1, 2, 3], [2, 3, 21], [1, 2,
3], [1, 2, 4, 8], [1, 2, 4, 5, 6]]
>>> superlists = super_lists(lists)
>>> expected = [[2, 16, 17], [1, 2, 3, 4, 5, 6, 7], [17, 18, 19, 22, 41, 48], [5
0, 69], [2, 3, 21], [1, 2, 4, 8]]
>>> assert(superlists == expected)
>>> print superlists
[[2, 16, 17], [1, 2, 3, 4, 5, 6, 7], [17, 18, 19, 22, 41, 48], [50, 69], [2, 3,
21], [1, 2, 4, 8]]
这可以简化,但:
l = [[1, 2, 4, 8], [1, 2, 4, 5, 6], [1, 2, 3], [2, 3, 21], [1, 2, 3, 4], [1, 2, 3, 4, 5, 6, 7]]
l2 = l[:]
for m in l:
for n in l:
if set(m).issubset(set(n)) and m != n:
l2.remove(m)
break
print l2
[[1, 2, 4, 8], [2, 3, 21], [1, 2, 3, 4, 5, 6, 7]]
新测试用例后的精确答案:
original= [[2, 16, 17], [1, 2, 3, 4, 5, 6, 7], [1], [1, 2, 3, 4], [1, 2], [17, 18, 19, 22, 41, 48], [2, 3], [1, 2, 3], [50, 69], [1, 2, 3], [2, 3, 21], [1, 2, 3], [1, 2, 4, 8], [1, 2, 4, 5, 6]]
class SetAndList:
def __init__(self,aList):
self.list=aList
self.set=set(aList)
self.isUnique=True
def compare(self,other):
if self.set.issubset(other.set):
#print self.list,'superceded by',other.list
self.isUnique=False
def listReduce(lists):
temp=[]
for l in lists:
s=SetAndList(l)
for t in temp:
t.compare(s)
s.compare(t)
temp.append( s )
temp=[t for t in temp if t.isUnique]
return [t.list for t in temp if t.isUnique]
print listReduce(original)
您没有给出所需的输出,但我猜这是对的,因为输出中没有出现[1,2,3]
。感谢所有建议解决方案的人,以及处理我有时出错的数据集的人。使用@hughdbrown解决方案,我将其修改为我想要的:
修改是在目标上使用滑动窗口,以确保找到子集序列。我认为我应该使用比“Set”更合适的词来描述我的问题
def is_sublist_of_any_list(cand, lists):
# Compare candidate to a single list
def is_sublist_of_list(cand, target):
try:
i = 0
try:
start = target.index(cand[0])
except:
return False
while start < (len(target) + len(cand)) - start:
if cand == target[start:len(cand)]:
return True
else:
start = target.index(cand[0], start + 1)
except ValueError:
return False
# See if candidate matches any other list
return any(is_sublist_of_list(cand, target) for target in lists if len(cand) <= len(target))
# Compare candidates to all other lists
def super_lists(lists):
a = [cand for i, cand in enumerate(lists) if not is_sublist_of_any_list(cand, lists[:i] + lists[i+1:])]
return a
lists = [[2, 16, 17], [1, 2, 3, 4, 5, 6, 7], [1], [1, 2, 3, 4], [1, 2], [17, 18, 19, 22, 41, 48], [2, 3], [1, 2, 3], [50, 69], [1, 2, 3], [2, 3, 21], [1, 2, 3], [1, 2, 4, 8], [1, 2, 4, 5, 6]]
expect = [[2, 16, 17], [1, 2, 3, 4, 5, 6, 7], [17, 18, 19, 22, 41, 48], [50, 69], [2, 3, 21], [1, 2, 4, 8], [1, 2, 4, 5, 6]]
def test():
out = super_lists(list(lists))
print "In : ", lists
print "Out : ", out
assert (out == expect)
因此,您真正想知道的是,一个列表是否是一个子字符串,可以说是另一个子字符串,所有匹配元素都是连续的。下面的代码将候选列表和目标列表转换为逗号分隔的字符串,并进行子字符串比较,以查看候选列表是否出现在目标列表中
def is_sublist_of_any_list(cand, lists):
def comma_list(l):
return "," + ",".join(str(x) for x in l) + ","
cand = comma_list(cand)
return any(cand in comma_list(target) for target in lists if len(cand) <= len(target))
def super_lists(lists):
return [cand for i, cand in enumerate(lists) if not is_sublist_of_any_list(cand, lists[:i] + lists[i+1:])]
def是任何列表(cand,列表)的子列表:
定义逗号列表(l):
返回“,”+“,”。连接(str(x)表示l中的x)+“,”
cand=逗号列表(cand)
如果len(cand)为列表中的目标返回any(cand)(以逗号表示)如果len(cand)为列表中的目标,则返回any(cand)(以逗号表示)超集列表是什么?它是不作为另一个集合的子集出现的任何集合?结果中不应该有[1,2,4,5,6]吗?否,[1,2,4,5,6]是[1,2,3,4,5,6]的“子集”,根据问题定义。我认为您需要生成一组确定的测试用例-我很乐意为它们编写代码。似乎我的两个答案都不完全正确。我不明白。[1,2,4,5,6]在一个测试数据集中被省略,因为[1,2,3,4,5,6,7],但在这个测试数据中没有?[[1,2,3,4,5,6,7],[1,2,4,5,6]]我是否读错了“No-reduce”注释?这不符合列表顺序,例如,如果给出[[1,2,3,4],[2,4,3],[3,4,5]]结果是[[1,2,3,4],[2,4,3]],我希望它返回初始输入。@Triptych:他在原始问题中没有说明。我确实说明了顺序很重要,“必须尊重顺序”。但这不是出价交易。谢谢你提供了可能的解决方案。@Oli_UK:如果顺序不重要,那么使用集合显然是赢家。迭代解决方案将是一个错误。你能澄清这一点吗?你的第二个解决方案包括[1,2,4,5,6],而它不应该。是_superseq()似乎假设元素必须是连续的才能取消列表的资格。这一行是个好主意:“index=haystack.index(element,index)”。相反,我每次都会缩短列表。不过,我猜这段代码会说[1,1,1,1,1,1]是
[[1, 2, 4, 8], [2, 3, 21], [1, 2, 3, 4, 5, 6, 7]]
>>> lists = [[2, 16, 17], [1, 2, 3, 4, 5, 6, 7], [1], [1, 2, 3, 4], [1, 2], [17,
18, 19, 22, 41, 48], [2, 3], [1, 2, 3], [50, 69], [1, 2, 3], [2, 3, 21], [1, 2,
3], [1, 2, 4, 8], [1, 2, 4, 5, 6]]
>>> superlists = super_lists(lists)
>>> expected = [[2, 16, 17], [1, 2, 3, 4, 5, 6, 7], [17, 18, 19, 22, 41, 48], [5
0, 69], [2, 3, 21], [1, 2, 4, 8]]
>>> assert(superlists == expected)
>>> print superlists
[[2, 16, 17], [1, 2, 3, 4, 5, 6, 7], [17, 18, 19, 22, 41, 48], [50, 69], [2, 3,
21], [1, 2, 4, 8]]
l = [[1, 2, 4, 8], [1, 2, 4, 5, 6], [1, 2, 3], [2, 3, 21], [1, 2, 3, 4], [1, 2, 3, 4, 5, 6, 7]]
l2 = l[:]
for m in l:
for n in l:
if set(m).issubset(set(n)) and m != n:
l2.remove(m)
break
print l2
[[1, 2, 4, 8], [2, 3, 21], [1, 2, 3, 4, 5, 6, 7]]
original= [[2, 16, 17], [1, 2, 3, 4, 5, 6, 7], [1], [1, 2, 3, 4], [1, 2], [17, 18, 19, 22, 41, 48], [2, 3], [1, 2, 3], [50, 69], [1, 2, 3], [2, 3, 21], [1, 2, 3], [1, 2, 4, 8], [1, 2, 4, 5, 6]]
class SetAndList:
def __init__(self,aList):
self.list=aList
self.set=set(aList)
self.isUnique=True
def compare(self,other):
if self.set.issubset(other.set):
#print self.list,'superceded by',other.list
self.isUnique=False
def listReduce(lists):
temp=[]
for l in lists:
s=SetAndList(l)
for t in temp:
t.compare(s)
s.compare(t)
temp.append( s )
temp=[t for t in temp if t.isUnique]
return [t.list for t in temp if t.isUnique]
print listReduce(original)
def is_sublist_of_any_list(cand, lists):
# Compare candidate to a single list
def is_sublist_of_list(cand, target):
try:
i = 0
try:
start = target.index(cand[0])
except:
return False
while start < (len(target) + len(cand)) - start:
if cand == target[start:len(cand)]:
return True
else:
start = target.index(cand[0], start + 1)
except ValueError:
return False
# See if candidate matches any other list
return any(is_sublist_of_list(cand, target) for target in lists if len(cand) <= len(target))
# Compare candidates to all other lists
def super_lists(lists):
a = [cand for i, cand in enumerate(lists) if not is_sublist_of_any_list(cand, lists[:i] + lists[i+1:])]
return a
lists = [[2, 16, 17], [1, 2, 3, 4, 5, 6, 7], [1], [1, 2, 3, 4], [1, 2], [17, 18, 19, 22, 41, 48], [2, 3], [1, 2, 3], [50, 69], [1, 2, 3], [2, 3, 21], [1, 2, 3], [1, 2, 4, 8], [1, 2, 4, 5, 6]]
expect = [[2, 16, 17], [1, 2, 3, 4, 5, 6, 7], [17, 18, 19, 22, 41, 48], [50, 69], [2, 3, 21], [1, 2, 4, 8], [1, 2, 4, 5, 6]]
def test():
out = super_lists(list(lists))
print "In : ", lists
print "Out : ", out
assert (out == expect)
In : [[2, 16, 17], [1, 2, 3, 4, 5, 6, 7], [1], [1, 2, 3, 4], [1, 2], [17, 18, 19, 22, 41, 48], [2, 3], [1, 2, 3], [50, 69], [1, 2, 3], [2, 3, 21], [1, 2, 3], [1, 2, 4, 8], [1, 2, 4, 5, 6]]
Out : [[2, 16, 17], [1, 2, 3, 4, 5, 6, 7], [17, 18, 19, 22, 41, 48], [50, 69], [2, 3, 21], [1, 2, 4, 8], [1, 2, 4, 5, 6]]
def is_sublist_of_any_list(cand, lists):
def comma_list(l):
return "," + ",".join(str(x) for x in l) + ","
cand = comma_list(cand)
return any(cand in comma_list(target) for target in lists if len(cand) <= len(target))
def super_lists(lists):
return [cand for i, cand in enumerate(lists) if not is_sublist_of_any_list(cand, lists[:i] + lists[i+1:])]