Python 用另一个文件的行替换文件的行
我有两个.txt格式的点云文件(场景和绿色)。场景点云通常包含100000多条线,例如,绿色点云包含20000条线。这两个文件的绿点线相等,但最后一个数字是每个点的标签 场景:Python 用另一个文件的行替换文件的行,python,numpy,Python,Numpy,我有两个.txt格式的点云文件(场景和绿色)。场景点云通常包含100000多条线,例如,绿色点云包含20000条线。这两个文件的绿点线相等,但最后一个数字是每个点的标签 场景: 0.805309, -3.43696, 6.85463, 0, 0, 0, 5 0.811636, -3.42248, 6.82576, 0, 0, 0, 5 -1.00663, 0.0985967, 3.02769, 42, 134, 83, 5 -1.00182, 0.098547, 3.02617, 43, 133
0.805309, -3.43696, 6.85463, 0, 0, 0, 5
0.811636, -3.42248, 6.82576, 0, 0, 0, 5
-1.00663, 0.0985967, 3.02769, 42, 134, 83, 5
-1.00182, 0.098547, 3.02617, 43, 133, 83, 5
-0.997052, 0.0985018, 3.02478, 41, 133, 82, 5
0.811636, -3.42248, 6.82576, 0, 0, 0, 5
-1.00663, 0.0985967, 3.02769, 42, 134, 83, 3
-1.00182, 0.098547, 3.02617, 43, 133, 83, 3
-0.997052, 0.0985018, 3.02478, 41, 133, 82, 3
0.805309, -3.43696, 6.85463, 0, 0, 0, 5
0.811636, -3.42248, 6.82576, 0, 0, 0, 5
-1.00663, 0.0985967, 3.02769, 42, 134, 83, 3
-1.00182, 0.098547, 3.02617, 43, 133, 83, 3
-0.997052, 0.0985018, 3.02478, 41, 133, 82, 3
0.811636, -3.42248, 6.82576, 0, 0, 0, 5
import os
import fileinput
import numpy
def main(scene1, others):
pointcloud = []
scene1 = open(scene1,"r+")
scene = []
for each_point in scene1:
scene.append(each_point)
for file in others:
other = open(file, "r+")
for line in other:
pointcloud = []
line1 = line[:-3]
for sceneLine in scene:
sceneLine1 = sceneLine[:-3]
if sceneLine1 == line1:
pointcloud.append(line)
else:
pointcloud.append(sceneLine)
scene = pointcloud
with open('pointcloud.txt', 'w') as points:
for item in scene:
points.write("%s" % item)
others = []
for file in os.listdir("./"):
if file.endswith(".txt"):
if file.startswith("pointCloudScene9863Cl"):
scene = file
else:
others.append(file)
main(scene,others)
绿色:
0.805309, -3.43696, 6.85463, 0, 0, 0, 5
0.811636, -3.42248, 6.82576, 0, 0, 0, 5
-1.00663, 0.0985967, 3.02769, 42, 134, 83, 5
-1.00182, 0.098547, 3.02617, 43, 133, 83, 5
-0.997052, 0.0985018, 3.02478, 41, 133, 82, 5
0.811636, -3.42248, 6.82576, 0, 0, 0, 5
-1.00663, 0.0985967, 3.02769, 42, 134, 83, 3
-1.00182, 0.098547, 3.02617, 43, 133, 83, 3
-0.997052, 0.0985018, 3.02478, 41, 133, 82, 3
0.805309, -3.43696, 6.85463, 0, 0, 0, 5
0.811636, -3.42248, 6.82576, 0, 0, 0, 5
-1.00663, 0.0985967, 3.02769, 42, 134, 83, 3
-1.00182, 0.098547, 3.02617, 43, 133, 83, 3
-0.997052, 0.0985018, 3.02478, 41, 133, 82, 3
0.811636, -3.42248, 6.82576, 0, 0, 0, 5
import os
import fileinput
import numpy
def main(scene1, others):
pointcloud = []
scene1 = open(scene1,"r+")
scene = []
for each_point in scene1:
scene.append(each_point)
for file in others:
other = open(file, "r+")
for line in other:
pointcloud = []
line1 = line[:-3]
for sceneLine in scene:
sceneLine1 = sceneLine[:-3]
if sceneLine1 == line1:
pointcloud.append(line)
else:
pointcloud.append(sceneLine)
scene = pointcloud
with open('pointcloud.txt', 'w') as points:
for item in scene:
points.write("%s" % item)
others = []
for file in os.listdir("./"):
if file.endswith(".txt"):
if file.startswith("pointCloudScene9863Cl"):
scene = file
else:
others.append(file)
main(scene,others)
我希望将场景绿点中的整条线替换为绿色文件中的等号线,或者仅将标签编号从5更改为3,只要两条线相等。最终结果如下:
场景:
0.805309, -3.43696, 6.85463, 0, 0, 0, 5
0.811636, -3.42248, 6.82576, 0, 0, 0, 5
-1.00663, 0.0985967, 3.02769, 42, 134, 83, 5
-1.00182, 0.098547, 3.02617, 43, 133, 83, 5
-0.997052, 0.0985018, 3.02478, 41, 133, 82, 5
0.811636, -3.42248, 6.82576, 0, 0, 0, 5
-1.00663, 0.0985967, 3.02769, 42, 134, 83, 3
-1.00182, 0.098547, 3.02617, 43, 133, 83, 3
-0.997052, 0.0985018, 3.02478, 41, 133, 82, 3
0.805309, -3.43696, 6.85463, 0, 0, 0, 5
0.811636, -3.42248, 6.82576, 0, 0, 0, 5
-1.00663, 0.0985967, 3.02769, 42, 134, 83, 3
-1.00182, 0.098547, 3.02617, 43, 133, 83, 3
-0.997052, 0.0985018, 3.02478, 41, 133, 82, 3
0.811636, -3.42248, 6.82576, 0, 0, 0, 5
import os
import fileinput
import numpy
def main(scene1, others):
pointcloud = []
scene1 = open(scene1,"r+")
scene = []
for each_point in scene1:
scene.append(each_point)
for file in others:
other = open(file, "r+")
for line in other:
pointcloud = []
line1 = line[:-3]
for sceneLine in scene:
sceneLine1 = sceneLine[:-3]
if sceneLine1 == line1:
pointcloud.append(line)
else:
pointcloud.append(sceneLine)
scene = pointcloud
with open('pointcloud.txt', 'w') as points:
for item in scene:
points.write("%s" % item)
others = []
for file in os.listdir("./"):
if file.endswith(".txt"):
if file.startswith("pointCloudScene9863Cl"):
scene = file
else:
others.append(file)
main(scene,others)
我已经编写了两种类型的代码来实现这一点,但是它们都会加载很长时间,这一点都不好,因为我有很多文件要修改。
第一个代码:
import os
import fileinput
def main(scene, others):
for file in others:
other = open(file, "r+")
for line in other:
line1 = line[:-3]
f=scene
for sceneLine in fileinput.input(f,inplace=True):
new = sceneLine
sceneLine1 = sceneLine[:-3]
if sceneLine1 == line1:
print(sceneLine.replace(new, line), end='')
else:
print(sceneLine.replace(line,line), end='')
fileinput.close()
others = []
for file in os.listdir("./"):
if file.endswith(".txt"):
if file.startswith("pointCloudScene9863Cl"):
scene = file
else:
others.append(file)
main(scene,others)
第二个代码:
0.805309, -3.43696, 6.85463, 0, 0, 0, 5
0.811636, -3.42248, 6.82576, 0, 0, 0, 5
-1.00663, 0.0985967, 3.02769, 42, 134, 83, 5
-1.00182, 0.098547, 3.02617, 43, 133, 83, 5
-0.997052, 0.0985018, 3.02478, 41, 133, 82, 5
0.811636, -3.42248, 6.82576, 0, 0, 0, 5
-1.00663, 0.0985967, 3.02769, 42, 134, 83, 3
-1.00182, 0.098547, 3.02617, 43, 133, 83, 3
-0.997052, 0.0985018, 3.02478, 41, 133, 82, 3
0.805309, -3.43696, 6.85463, 0, 0, 0, 5
0.811636, -3.42248, 6.82576, 0, 0, 0, 5
-1.00663, 0.0985967, 3.02769, 42, 134, 83, 3
-1.00182, 0.098547, 3.02617, 43, 133, 83, 3
-0.997052, 0.0985018, 3.02478, 41, 133, 82, 3
0.811636, -3.42248, 6.82576, 0, 0, 0, 5
import os
import fileinput
import numpy
def main(scene1, others):
pointcloud = []
scene1 = open(scene1,"r+")
scene = []
for each_point in scene1:
scene.append(each_point)
for file in others:
other = open(file, "r+")
for line in other:
pointcloud = []
line1 = line[:-3]
for sceneLine in scene:
sceneLine1 = sceneLine[:-3]
if sceneLine1 == line1:
pointcloud.append(line)
else:
pointcloud.append(sceneLine)
scene = pointcloud
with open('pointcloud.txt', 'w') as points:
for item in scene:
points.write("%s" % item)
others = []
for file in os.listdir("./"):
if file.endswith(".txt"):
if file.startswith("pointCloudScene9863Cl"):
scene = file
else:
others.append(file)
main(scene,others)
这两种方法都适用于少量点,但当我使用原始点云文件时,需要30分钟甚至更长时间才能完成工作。实际上,当我基本上使用嵌套循环时,我在FOR循环中看到了问题,这意味着我将有100000*20000个循环来更改绿点
使用numpy数组或任何其他方法是否有一种有效的方法 我认为你应该问自己一些关于数据的基本问题:
使用
numba
jit编译的“暴力”解决方案。只是为了好玩,最好用这个。最昂贵的操作是在mod_arr[j,:]=mod[i,:]
期间的内存IO
import timeit
将numpy作为np导入
来自numba import njit
###嵌套循环的numba njit版本
@njit
def修改(arr、mod、tol=0.00000000 1):
mod_arr=arr[:]
mask=np.one(arr.shape[0]).astype(np.bool_u2;)
idx=np.arange(0,arr.shape[0],1)
对于范围内的i(模形状[0]):
对于idx[掩码]中的j:
如果np.绝对(np.和(arr[j,:-1]-mod[i,:-1])
[真的,真的,真的,真的,真的,真的]
#现在让我们将阵列放大。。。
a=np.瓷砖(a,(17000,1))#a.形状为(102000,7)
m=np.瓷砖(m,(7000,1))#m.形状为(21000,7)
###性能检查:
%修改时间(a,m)
# -->
每个回路2min 55s±4.07 s(7次运行的平均值±标准偏差,每个回路1次)
我有一个应该足够的解决方案,但在此之前,我有一个免责声明:如果没有您提供的更多信息,就不可能找到合适的解决方案。我们需要这个问题的背景,以及关于数据格式和您正在尝试做什么的更精确和详细的信息
例如,比较浮点数是否相等感觉不太好,通常数字的操作在精度等方面总是有一定的风险。因为这些点似乎来自同一个地方,如果每个点都有某种可以用来检查相等性的唯一ID,那就太好了
和这里的其他一些人一样,我的第一反应就是抓到了努比和熊猫。这对我来说是个错误,因为这项任务根本不涉及太多的数据操作或转换 下面是我现在能想到的最简单的实现:
def point_parse(line):
line_point = line.split(", ")
line_point[0] = float(line_point[0])
line_point[1] = float(line_point[1])
line_point[2] = float(line_point[2])
line_point[3] = int(line_point[3])
line_point[4] = int(line_point[4])
line_point[5] = int(line_point[5])
line_point[6] = int(line_point[6])
return tuple(line_point)
green_points_set: frozenset
black_points_set: frozenset
with open("../resources/Green_long.txt", "r") as green_file:
green_points_set = frozenset((point_parse(line)[:-1] for line in green_file))
with open("../resources/Black_long.txt", "r") as black_file:
black_points_set = frozenset((point_parse(line)[:-1] for line in black_file))
def set_point_label(point):
point_comp = point[:-1]
if point_comp in green_points_set:
point_comp += (3,)
elif point_comp in black_points_set:
point_comp += (4,)
else:
point_comp = point
return point_comp
with open("../resources/Scene_long.txt", "r") as scene_file:
scene_points_new = (set_point_label(point_parse(line)) for line in scene_file)
form_lines = ((f"{res_line[0]}, {res_line[1]}, {res_line[2]}, {res_line[3]}, "
f"{res_line[4]}, {res_line[5]}, {res_line[6]}\n") for res_line in scene_points_new)
with open("../out/Scene_out.txt", "w") as scene_out:
scene_out.writelines(form_lines)
代码非常简单。为绿点和黑点创建集合,我们测试成员资格,并适当更改标签
我为自己创建了一些训练数据:一个总共有1000000点、125000个绿点和125000个黑点的场景。运行时间不足7秒(希望我没有犯任何严重错误!),内存使用应该很低。我认为基本上最好将两个文件内容加载到数组中(100k和20k行不太多),比较数组以创建所需的输出数组,然后保存输出。感谢您的回答,将文件加载到列表中与使用numpy数组将文件加载到内存中不同?我的意思是,如果我没有错的话,python list和numpy数组都会将数据加载到内存中。我特别要说的是fileinput中的sceneLine的
。input(f,inplace=True):
或场景中的sceneLine:
。您需要所有这些迭代,还是可以限制它?将数据保存在数组或数据帧中可以通过将当前数据保存在