Python 用另一个文件的行替换文件的行_Python_Numpy

Python 用另一个文件的行替换文件的行

python numpy

Python 用另一个文件的行替换文件的行,python,numpy,Python,Numpy,我有两个.txt格式的点云文件（场景和绿色）。场景点云通常包含100000多条线，例如，绿色点云包含20000条线。这两个文件的绿点线相等，但最后一个数字是每个点的标签场景： 0.805309, -3.43696, 6.85463, 0, 0, 0, 5 0.811636, -3.42248, 6.82576, 0, 0, 0, 5 -1.00663, 0.0985967, 3.02769, 42, 134, 83, 5 -1.00182, 0.098547, 3.02617, 43, 133

我有两个.txt格式的点云文件（场景和绿色）。场景点云通常包含100000多条线，例如，绿色点云包含20000条线。这两个文件的绿点线相等，但最后一个数字是每个点的标签

场景：

0.805309, -3.43696, 6.85463, 0, 0, 0, 5
0.811636, -3.42248, 6.82576, 0, 0, 0, 5
-1.00663, 0.0985967, 3.02769, 42, 134, 83, 5
-1.00182, 0.098547, 3.02617, 43, 133, 83, 5
-0.997052, 0.0985018, 3.02478, 41, 133, 82, 5
0.811636, -3.42248, 6.82576, 0, 0, 0, 5

-1.00663, 0.0985967, 3.02769, 42, 134, 83, 3
-1.00182, 0.098547, 3.02617, 43, 133, 83, 3
-0.997052, 0.0985018, 3.02478, 41, 133, 82, 3

0.805309, -3.43696, 6.85463, 0, 0, 0, 5
0.811636, -3.42248, 6.82576, 0, 0, 0, 5
-1.00663, 0.0985967, 3.02769, 42, 134, 83, 3
-1.00182, 0.098547, 3.02617, 43, 133, 83, 3
-0.997052, 0.0985018, 3.02478, 41, 133, 82, 3
0.811636, -3.42248, 6.82576, 0, 0, 0, 5

import os
import fileinput
import numpy

def main(scene1, others):

    pointcloud = []
    scene1 = open(scene1,"r+")
    scene = []
    for each_point in scene1:
        scene.append(each_point)

    for file in others:
        other = open(file, "r+")
        for line in other:
            pointcloud = []
            line1 = line[:-3]
            for sceneLine in scene:
                sceneLine1 = sceneLine[:-3]
                if sceneLine1 == line1:
                    pointcloud.append(line)
                else:
                    pointcloud.append(sceneLine)
            scene = pointcloud

    with open('pointcloud.txt', 'w') as points:
        for item in scene:
            points.write("%s" % item)


others = []
for file in os.listdir("./"):
    if file.endswith(".txt"):
        if file.startswith("pointCloudScene9863Cl"):
            scene = file
        else:
            others.append(file)

main(scene,others)

绿色：

0.805309, -3.43696, 6.85463, 0, 0, 0, 5
0.811636, -3.42248, 6.82576, 0, 0, 0, 5
-1.00663, 0.0985967, 3.02769, 42, 134, 83, 5
-1.00182, 0.098547, 3.02617, 43, 133, 83, 5
-0.997052, 0.0985018, 3.02478, 41, 133, 82, 5
0.811636, -3.42248, 6.82576, 0, 0, 0, 5

-1.00663, 0.0985967, 3.02769, 42, 134, 83, 3
-1.00182, 0.098547, 3.02617, 43, 133, 83, 3
-0.997052, 0.0985018, 3.02478, 41, 133, 82, 3

0.805309, -3.43696, 6.85463, 0, 0, 0, 5
0.811636, -3.42248, 6.82576, 0, 0, 0, 5
-1.00663, 0.0985967, 3.02769, 42, 134, 83, 3
-1.00182, 0.098547, 3.02617, 43, 133, 83, 3
-0.997052, 0.0985018, 3.02478, 41, 133, 82, 3
0.811636, -3.42248, 6.82576, 0, 0, 0, 5

import os
import fileinput
import numpy

def main(scene1, others):

    pointcloud = []
    scene1 = open(scene1,"r+")
    scene = []
    for each_point in scene1:
        scene.append(each_point)

    for file in others:
        other = open(file, "r+")
        for line in other:
            pointcloud = []
            line1 = line[:-3]
            for sceneLine in scene:
                sceneLine1 = sceneLine[:-3]
                if sceneLine1 == line1:
                    pointcloud.append(line)
                else:
                    pointcloud.append(sceneLine)
            scene = pointcloud

    with open('pointcloud.txt', 'w') as points:
        for item in scene:
            points.write("%s" % item)


others = []
for file in os.listdir("./"):
    if file.endswith(".txt"):
        if file.startswith("pointCloudScene9863Cl"):
            scene = file
        else:
            others.append(file)

main(scene,others)

我希望将场景绿点中的整条线替换为绿色文件中的等号线，或者仅将标签编号从5更改为3，只要两条线相等。最终结果如下： 场景：

0.805309, -3.43696, 6.85463, 0, 0, 0, 5
0.811636, -3.42248, 6.82576, 0, 0, 0, 5
-1.00663, 0.0985967, 3.02769, 42, 134, 83, 5
-1.00182, 0.098547, 3.02617, 43, 133, 83, 5
-0.997052, 0.0985018, 3.02478, 41, 133, 82, 5
0.811636, -3.42248, 6.82576, 0, 0, 0, 5

-1.00663, 0.0985967, 3.02769, 42, 134, 83, 3
-1.00182, 0.098547, 3.02617, 43, 133, 83, 3
-0.997052, 0.0985018, 3.02478, 41, 133, 82, 3

0.805309, -3.43696, 6.85463, 0, 0, 0, 5
0.811636, -3.42248, 6.82576, 0, 0, 0, 5
-1.00663, 0.0985967, 3.02769, 42, 134, 83, 3
-1.00182, 0.098547, 3.02617, 43, 133, 83, 3
-0.997052, 0.0985018, 3.02478, 41, 133, 82, 3
0.811636, -3.42248, 6.82576, 0, 0, 0, 5

import os
import fileinput
import numpy

def main(scene1, others):

    pointcloud = []
    scene1 = open(scene1,"r+")
    scene = []
    for each_point in scene1:
        scene.append(each_point)

    for file in others:
        other = open(file, "r+")
        for line in other:
            pointcloud = []
            line1 = line[:-3]
            for sceneLine in scene:
                sceneLine1 = sceneLine[:-3]
                if sceneLine1 == line1:
                    pointcloud.append(line)
                else:
                    pointcloud.append(sceneLine)
            scene = pointcloud

    with open('pointcloud.txt', 'w') as points:
        for item in scene:
            points.write("%s" % item)


others = []
for file in os.listdir("./"):
    if file.endswith(".txt"):
        if file.startswith("pointCloudScene9863Cl"):
            scene = file
        else:
            others.append(file)

main(scene,others)

我已经编写了两种类型的代码来实现这一点，但是它们都会加载很长时间，这一点都不好，因为我有很多文件要修改。第一个代码：

import os
import fileinput
def main(scene, others):

    for file in others:
        other = open(file, "r+")
        for line in other:
            line1 = line[:-3]
            f=scene
            for sceneLine in fileinput.input(f,inplace=True):
                new = sceneLine
                sceneLine1 = sceneLine[:-3]
                if sceneLine1 == line1:
                    print(sceneLine.replace(new, line), end='')
                else:
                    print(sceneLine.replace(line,line), end='')
            fileinput.close()


others = []
for file in os.listdir("./"):
    if file.endswith(".txt"):
        if file.startswith("pointCloudScene9863Cl"):
            scene = file
        else:
            others.append(file)

main(scene,others)

第二个代码：

0.805309, -3.43696, 6.85463, 0, 0, 0, 5
0.811636, -3.42248, 6.82576, 0, 0, 0, 5
-1.00663, 0.0985967, 3.02769, 42, 134, 83, 5
-1.00182, 0.098547, 3.02617, 43, 133, 83, 5
-0.997052, 0.0985018, 3.02478, 41, 133, 82, 5
0.811636, -3.42248, 6.82576, 0, 0, 0, 5

-1.00663, 0.0985967, 3.02769, 42, 134, 83, 3
-1.00182, 0.098547, 3.02617, 43, 133, 83, 3
-0.997052, 0.0985018, 3.02478, 41, 133, 82, 3

0.805309, -3.43696, 6.85463, 0, 0, 0, 5
0.811636, -3.42248, 6.82576, 0, 0, 0, 5
-1.00663, 0.0985967, 3.02769, 42, 134, 83, 3
-1.00182, 0.098547, 3.02617, 43, 133, 83, 3
-0.997052, 0.0985018, 3.02478, 41, 133, 82, 3
0.811636, -3.42248, 6.82576, 0, 0, 0, 5

import os
import fileinput
import numpy

def main(scene1, others):

    pointcloud = []
    scene1 = open(scene1,"r+")
    scene = []
    for each_point in scene1:
        scene.append(each_point)

    for file in others:
        other = open(file, "r+")
        for line in other:
            pointcloud = []
            line1 = line[:-3]
            for sceneLine in scene:
                sceneLine1 = sceneLine[:-3]
                if sceneLine1 == line1:
                    pointcloud.append(line)
                else:
                    pointcloud.append(sceneLine)
            scene = pointcloud

    with open('pointcloud.txt', 'w') as points:
        for item in scene:
            points.write("%s" % item)


others = []
for file in os.listdir("./"):
    if file.endswith(".txt"):
        if file.startswith("pointCloudScene9863Cl"):
            scene = file
        else:
            others.append(file)

main(scene,others)

这两种方法都适用于少量点，但当我使用原始点云文件时，需要30分钟甚至更长时间才能完成工作。实际上，当我基本上使用嵌套循环时，我在FOR循环中看到了问题，这意味着我将有100000*20000个循环来更改绿点

使用numpy数组或任何其他方法是否有一种有效的方法

我认为你应该问自己一些关于数据的基本问题：

文件中的顺序是否保留？我的意思是，您是否必须始终搜索整个文件，或者在某个位置找到绿点后，您可以跳过文件某些部分的比较

10万张唱片并不多。会不会比现在多1000倍？您能否将整个文件一次性读入内存（Numpy数组或数据帧），这样就可以使用RAM和CPU缓存，而不是从磁盘多次读取？在最近发现的绿点上设置偏移量将是一个可行的选择

使用

numba

jit编译的“暴力”解决方案。只是为了好玩，最好用这个。最昂贵的操作是在

mod_arr[j，：]=mod[i，：]

期间的内存IO

import timeit
将numpy作为np导入
来自numba import njit
###嵌套循环的numba njit版本
@njit
def修改（arr、mod、tol=0.00000000 1）：
mod_arr=arr[：]
mask=np.one（arr.shape[0]）.astype（np.bool_u2;）
idx=np.arange（0，arr.shape[0]，1）
对于范围内的i（模形状[0]）：
对于idx[掩码]中的j：
如果np.绝对（np.和（arr[j，：-1]-mod[i，：-1]）
[真的，真的，真的，真的，真的，真的]
#现在让我们将阵列放大。。。
a=np.瓷砖（a，（17000，1））#a.形状为（102000，7）
m=np.瓷砖（m，（7000，1））#m.形状为（21000，7）
###性能检查：
%修改时间（a，m）
# -->
每个回路2min 55s±4.07 s（7次运行的平均值±标准偏差，每个回路1次）

我有一个应该足够的解决方案，但在此之前，我有一个免责声明：如果没有您提供的更多信息，就不可能找到合适的解决方案。我们需要这个问题的背景，以及关于数据格式和您正在尝试做什么的更精确和详细的信息

例如，比较浮点数是否相等感觉不太好，通常数字的操作在精度等方面总是有一定的风险。因为这些点似乎来自同一个地方，如果每个点都有某种可以用来检查相等性的唯一ID，那就太好了

和这里的其他一些人一样，我的第一反应就是抓到了努比和熊猫。这对我来说是个错误，因为这项任务根本不涉及太多的数据操作或转换

下面是我现在能想到的最简单的实现：

def point_parse(line):
    line_point = line.split(", ")
    line_point[0] = float(line_point[0])
    line_point[1] = float(line_point[1])
    line_point[2] = float(line_point[2])
    line_point[3] = int(line_point[3])
    line_point[4] = int(line_point[4])
    line_point[5] = int(line_point[5])
    line_point[6] = int(line_point[6])
    return tuple(line_point)

green_points_set: frozenset
black_points_set: frozenset

with open("../resources/Green_long.txt", "r") as green_file:
    green_points_set = frozenset((point_parse(line)[:-1] for line in green_file))

with open("../resources/Black_long.txt", "r") as black_file:
    black_points_set = frozenset((point_parse(line)[:-1] for line in black_file))

def set_point_label(point):
    point_comp = point[:-1]
    if point_comp in green_points_set:
        point_comp += (3,)
    elif point_comp in black_points_set:
        point_comp += (4,)
    else:
        point_comp = point
    return point_comp

with open("../resources/Scene_long.txt", "r") as scene_file:
    scene_points_new = (set_point_label(point_parse(line)) for line in scene_file)
    form_lines = ((f"{res_line[0]}, {res_line[1]}, {res_line[2]}, {res_line[3]}, "
               f"{res_line[4]}, {res_line[5]}, {res_line[6]}\n") for res_line in scene_points_new)

    with open("../out/Scene_out.txt", "w") as scene_out:
        scene_out.writelines(form_lines)

代码非常简单。为绿点和黑点创建集合，我们测试成员资格，并适当更改标签

我为自己创建了一些训练数据：一个总共有1000000点、125000个绿点和125000个黑点的场景。运行时间不足7秒（希望我没有犯任何严重错误！），内存使用应该很低。

我认为基本上最好将两个文件内容加载到数组中（100k和20k行不太多），比较数组以创建所需的输出数组，然后保存输出。感谢您的回答，将文件加载到列表中与使用numpy数组将文件加载到内存中不同？我的意思是，如果我没有错的话，python list和numpy数组都会将数据加载到内存中。我特别要说的是fileinput中的sceneLine的

。input（f，inplace=True）：

或

场景中的sceneLine:

。您需要所有这些迭代，还是可以限制它？将数据保存在数组或数据帧中可以通过将当前数据保存在