python读取txt文件缺少数据_Python_Csv_Pandas

python读取txt文件缺少数据

python csv pandas

python读取txt文件缺少数据,python,csv,pandas,Python,Csv,Pandas,这是从终端输出的 8677 (8637, 2) 这是代码，不知道为什么有些数据丢失了？在random_list.txt中找不到img_list.txt的最后40个条目。我错过了什么 import os import argparse import math import pandas as pd import numpy as np example_dirname = os.path.abspath(os.path.dirname(__file__)) caffe_dirname = os

这是从终端输出的 8677 (8637, 2) 这是代码，不知道为什么有些数据丢失了？在random_list.txt中找不到img_list.txt的最后40个条目。我错过了什么

import os
import argparse
import math
import pandas as pd 
import numpy as np

example_dirname = os.path.abspath(os.path.dirname(__file__))

caffe_dirname = os.path.abspath(os.path.join(example_dirname,'../..'))
training_dirname = os.path.join(caffe_dirname,'data/101')
img_dirname = os.path.join(caffe_dirname,'data/101/101_ObjectCategories')

if __name__ == '__main__':
    parser =  argparse.ArgumentParser(
        description = 'Arrage the 101 data set')
    parser.add_argument(
        'train_percent',type = float, default = 0,
        help= "the percent of the training data")
    args = parser.parse_args()


    img_num = 0
    class_lable = 0
    img_class = open('image_class.txt','w')
    img_list = open('img_list.txt','wb')
    # img_test = open('test.txt','w')
    dirs = os.listdir(img_dirname)
    for folder in dirs:
        name = os.path.join(img_dirname, folder)
        new_name = os.path.join(img_dirname, folder.lower())
        if new_name != name:
            os.rename(name,new_name)

    dirs.sort()
    for folder in dirs:
        img_class.write(folder + ' ' + str(class_lable) + os.linesep)
        temp = os.path.join(img_dirname,folder)
        for file in sorted(os.listdir(temp)):   
            # print file        
            img_list.write(os.path.join(temp,file)+' '+str(class_lable)+ os.linesep)
            img_num = img_num + 1
        class_lable = class_lable + 1


    print img_num

    img_train = math.floor(args.train_percent * img_num)
    df = pd.read_csv('img_list.txt',sep = " ",  header = None)
    print df.shape  

    # df.columns = ["img_path", "img_class"]
    #df = df.iloc[np.random.permutation(img_num)]
    df.to_csv("randomlist.txt", header=None, index=None, sep=' ', mode='w')

    # df1 = df.iloc[:img_train]
    # df2 = df.iloc[img_train:]
    #print df1

    img_class.close()
    img_list.close()

在开始阅读

'img\u list.txt'

之前，请使用

img\u list.close（）
当您调用close（）
时，系统可能会将数据保存在缓冲区中并将其保存在文件中，而不是在您调用write（）
时。因此，在您使用pandas
读取文件后，它会保存一些数据。在您开始从'img\u list.txt'
读取之前，请使用img\u list.close（）
。当您调用close（）
时，系统可能会将数据保存在缓冲区中并将其保存在文件中，而不是在调用write（）
时。谢谢！它起作用了。你的意思是如果我不关闭它，img_列表的一些数据仍然在缓冲区中，所以当我使用read_csv时，我无法获取它？是的，一些数据仍然在缓冲区中，close（）将它们发送到文件。