python列表迭代,concat性能问题
我用两种方法编写了一个简单的插入排序函数python列表迭代,concat性能问题,python,Python,我用两种方法编写了一个简单的插入排序函数 def binary_search(the_array, item, start, end): if start == end: if the_array[start] > item: return start else: return start + 1 if start > end: return start mid
def binary_search(the_array, item, start, end):
if start == end:
if the_array[start] > item:
return start
else:
return start + 1
if start > end:
return start
mid = round((start + end)/ 2)
if the_array[mid] < item:
return binary_search(the_array, item, mid + 1, end)
elif the_array[mid] > item:
return binary_search(the_array, item, start, mid - 1)
else:
return mid
def insertion_sort(the_array):
l = len(the_array)
start = time.process_time()
for index in range(1, l):
value = the_array[index]
pos = binary_search(the_array, value, 0, index - 1)
#Way A:
#for p in range(index,pos,-1):
# the_array[p] = the_array[p-1]
#the_array[pos] = value
#Way B:
#the_array = the_array[:pos] + [value] + the_array[pos:index] + the_array[index+1:]
end = time.process_time()
print("Cost time:",end-start)
return the_array
B:
我不擅长python。在我的性能测试中,B方式比A方式快。我使用pythontime.process\u time获取时间偏移量
那么为什么B比A快呢?请帮帮我。谢谢
更新:
我使用10000个随机整数来测试A和B
for x in range(0,10000):
data.append(random.randint(0,10000))
A路成本2.3125秒B路成本0.890625秒
一天之后,没有答案告诉我为什么,所以我决定读一本关于这方面的书。在“高性能Python”中,我找到了为什么的答案!如果你想知道,你可以看到我自己的答案。假设方法A和B给出相同的结果,方法A必须在一个范围内迭代,而方法B是一个简单的赋值,因此方法B当然会更快 这是现有for循环下的嵌套for循环(可能是O(n2)最坏情况): 这是一个直接赋值,在一个for循环下:
the_array = the_array[:pos] + [value] + the_array[pos:index] + the_array[index+1:]
如果不运行这个函数,我假设这两个函数给出的结果是相同的——但是在一个范围内迭代与直接赋值相比总是比较慢。为什么在一个范围内迭代要比列表串联慢 原因有二:
the_array[pos+1:index+1] = the_array[pos:index]
the_array[pos] = value
它比A和B更快,而且仍然非常容易理解。Python是一种非常高级的解释语言。作为简单性和可读性的交换,像迭代
范围
生成器这样的琐碎任务可能会增加可感知的开销
相比之下,列表理解和切片实现了高性能
尽管它们只有一个常数因子不同,但实际上可以更快:
import random
import time
def binary_search(the_array, item, start, end):
if start == end:
if the_array[start] > item:
return start
else:
return start + 1
if start > end:
return start
mid = round((start + end)/ 2)
if the_array[mid] < item:
return binary_search(the_array, item, mid + 1, end)
elif the_array[mid] > item:
return binary_search(the_array, item, start, mid - 1)
else:
return mid
def insertion_sort_a(the_array):
l = len(the_array)
start = time.process_time()
for index in range(1, l):
value = the_array[index]
pos = binary_search(the_array, value, 0, index - 1)
for p in range(index,pos,-1):
the_array[p] = the_array[p-1]
the_array[pos] = value
end = time.process_time()
print("Cost time:",end-start,end="\t")
return the_array
def insertion_sort_b(the_array):
l = len(the_array)
start = time.process_time()
for index in range(1, l):
value = the_array[index]
pos = binary_search(the_array, value, 0, index - 1)
the_array = the_array[:pos] + [value] + the_array[pos:index] + the_array[index+1:]
end = time.process_time()
print(end-start, end="\t")
return the_array
def insertion_sort_c(the_array):
l = len(the_array)
start = time.process_time()
for index in range(1, l):
value = the_array[index]
while index > 0 and the_array[index-1] > value:
the_array[index] = the_array[index-1]
index -= 1
the_array[index] = value
end = time.process_time()
print(end-start, end="\t")
return the_array
def insertion_sort_d(the_array):
l = len(the_array)
start = time.process_time()
for index in range(1, l):
value = the_array[index]
pos = binary_search(the_array, value, 0, index - 1)
the_array[pos+1:index+1] = the_array[pos:index]
the_array[pos] = value
end = time.process_time()
print(end-start)
return the_array
for n in range(20):
n = 2**n
data = []
for x in range(0,n):
data.append(random.randint(0,n))
a = insertion_sort_a(list(data))
assert all(a[i] <= a[i+1] for i in range(len(a)-1))
b = insertion_sort_b(list(data))
assert all(b[i] <= b[i+1] for i in range(len(b)-1))
c = insertion_sort_c(list(data))
assert all(c[i] <= c[i+1] for i in range(len(c)-1))
d = insertion_sort_d(list(data))
assert all(d[i] <= d[i+1] for i in range(len(d)-1))
assert a == b
assert b == c
assert c == d
随机导入
导入时间
def二进制搜索(搜索数组、项目、开始、结束):
如果开始=结束:
如果_数组[start]>项:
回程起动
其他:
返回开始+1
如果开始>结束:
回程起动
中间=圆形((开始+结束)/2)
如果_数组[mid]<项:
返回二进制搜索(搜索数组,项,中间+1,结束)
elif_数组[mid]>项:
返回二进制搜索(数组、项、开始、mid-1)
其他:
中途返回
def插入_排序_a(_数组):
l=len(_数组)
开始=时间。处理时间()
对于范围(1,l)中的索引:
值=_数组[索引]
pos=二进制搜索(数组,值,0,索引-1)
对于范围内的p(索引,位置-1):
_数组[p]=_数组[p-1]
_数组[pos]=值
结束=时间。处理时间()
打印(“成本时间:”,结束开始,结束=“\t”)
返回_数组
def插入_排序_b(_数组):
l=len(_数组)
开始=时间。处理时间()
对于范围(1,l)中的索引:
值=_数组[索引]
pos=二进制搜索(数组,值,0,索引-1)
_数组=_数组[:pos]+[value]+_数组[pos:index]+_数组[index+1:]
结束=时间。处理时间()
打印(结束开始,结束=“\t”)
返回_数组
def插入_排序_c(_数组):
l=len(_数组)
开始=时间。处理时间()
对于范围(1,l)中的索引:
值=_数组[索引]
当索引>0且_数组[index-1]>值:
_数组[index]=_数组[index-1]
索引-=1
_数组[索引]=值
结束=时间。处理时间()
打印(结束开始,结束=“\t”)
返回_数组
def插入_排序_d(_数组):
l=len(_数组)
开始=时间。处理时间()
对于范围(1,l)中的索引:
值=_数组[索引]
pos=二进制搜索(数组,值,0,索引-1)
_数组[pos+1:index+1]=_数组[pos:index]
_数组[pos]=值
结束=时间。处理时间()
打印(结束-开始)
返回_数组
对于范围(20)内的n:
n=2**n
数据=[]
对于范围(0,n)内的x:
data.append(random.randint(0,n))
a=插入\排序\ a(列表(数据))
断言全部(a[i]请修复您的缩进目标是什么?在已排序的数组中编写一个insert方法并保持其排序?@AzatIbrakovfixed@B.Almeida抱歉,回复太晚。我在列表中使用随机整数并对它们进行排序。然后测试性能。我测试代码。插入排序c比所有其他的都差。插入排序d是最好的。所以原因不在范围之内e发电机花费时间。真正的原因见我的答案。
the_array = the_array[:pos] + [value] + the_array[pos:index] + the_array[index+1:]
the_array[pos+1:index+1] = the_array[pos:index]
the_array[pos] = value
import random
import time
def binary_search(the_array, item, start, end):
if start == end:
if the_array[start] > item:
return start
else:
return start + 1
if start > end:
return start
mid = round((start + end)/ 2)
if the_array[mid] < item:
return binary_search(the_array, item, mid + 1, end)
elif the_array[mid] > item:
return binary_search(the_array, item, start, mid - 1)
else:
return mid
def insertion_sort_a(the_array):
l = len(the_array)
start = time.process_time()
for index in range(1, l):
value = the_array[index]
pos = binary_search(the_array, value, 0, index - 1)
for p in range(index,pos,-1):
the_array[p] = the_array[p-1]
the_array[pos] = value
end = time.process_time()
print("Cost time:",end-start,end="\t")
return the_array
def insertion_sort_b(the_array):
l = len(the_array)
start = time.process_time()
for index in range(1, l):
value = the_array[index]
pos = binary_search(the_array, value, 0, index - 1)
the_array = the_array[:pos] + [value] + the_array[pos:index] + the_array[index+1:]
end = time.process_time()
print(end-start, end="\t")
return the_array
def insertion_sort_c(the_array):
l = len(the_array)
start = time.process_time()
for index in range(1, l):
value = the_array[index]
while index > 0 and the_array[index-1] > value:
the_array[index] = the_array[index-1]
index -= 1
the_array[index] = value
end = time.process_time()
print(end-start, end="\t")
return the_array
def insertion_sort_d(the_array):
l = len(the_array)
start = time.process_time()
for index in range(1, l):
value = the_array[index]
pos = binary_search(the_array, value, 0, index - 1)
the_array[pos+1:index+1] = the_array[pos:index]
the_array[pos] = value
end = time.process_time()
print(end-start)
return the_array
for n in range(20):
n = 2**n
data = []
for x in range(0,n):
data.append(random.randint(0,n))
a = insertion_sort_a(list(data))
assert all(a[i] <= a[i+1] for i in range(len(a)-1))
b = insertion_sort_b(list(data))
assert all(b[i] <= b[i+1] for i in range(len(b)-1))
c = insertion_sort_c(list(data))
assert all(c[i] <= c[i+1] for i in range(len(c)-1))
d = insertion_sort_d(list(data))
assert all(d[i] <= d[i+1] for i in range(len(d)-1))
assert a == b
assert b == c
assert c == d