Python 包含不同数据类型的dict的文件处理(I/O)
我想将dict存储在四种不同的文件类型中(Python 包含不同数据类型的dict的文件处理(I/O),python,json,csv,dictionary,file-io,Python,Json,Csv,Dictionary,File Io,我想将dict存储在四种不同的文件类型中(*.csv,*.json,*.txt,*.dat),其中包含多个str,一个str的列表,一个pd.Dataframe,np.ndarray,np.ndarrays的np.ndarrays和floats。我尝试了几种可能性,但都只起了部分作用。我无法将dict转换为pd.DataFrame,因为dict的条目具有不同的长度。否则,选择的方法将是pd.DataFrame.to_csv/pd.read_csv和pd.DataFrame.to_json/pd.
*.csv
,*.json
,*.txt
,*.dat
),其中包含多个str
,一个str
的列表,一个pd.Dataframe
,np.ndarray
,np.ndarray
s的np.ndarray
s和float
s。我尝试了几种可能性,但都只起了部分作用。我无法将dict
转换为pd.DataFrame
,因为dict的条目具有不同的长度。否则,选择的方法将是pd.DataFrame.to_csv
/pd.read_csv
和pd.DataFrame.to_json
/pd.DataFrame.read_json
读取文件意味着读取上一次程序运行的保存状态(数据将变成pd.DataFrame
)。在程序运行期间,随机值将被计算替换(或使用先前保存的配置文件中的值),因此不必担心np.random.rand
部分
注释掉的行是我在不同站点上发现的不同可能性(包括,如预期的)
写入文件
读取文件
你真正的问题是什么?如何将所述dict存储为csv
,json
。。。尽管不相容?或者哪种格式是合适的,比如说pickle
?如何优雅而可靠地将其存储为csv
…并再次读取,以便每个变量都转换为其原始数据类型。如果有一种方法可以保存“python程序运行”的整个状态,我会选择这种方法。但是我认为没有,所以我集中讨论了如何保存程序运行期间创建的所有变量。如果零件有pythonic方法,我会选择它们。我希望使用尽可能少的(额外)模块。是否可能(远)更容易存储值的原始数据类型,以便更容易重新转换…?CSV不适合您想要的。用泡菜。
import pandas as pd
import numpy as np
import csv
import os
import json
content = {
'rootpath' : os.path.abspath ( os.getcwd () ),
'indir' : '.',
'outdir' : 'output',
'workdir' : 'temporary',
'datafile' : 'file.csv',
'configfile' : 'configs.txt',
'savefile' : 'contents.csv',
'model' : np.arange (4),
'datafiles' : [os.path.join (os.getcwd (), 'data.csv'), os.path.join (os.getcwd (), 'data.dat')],
'data' : pd.DataFrame ( np.arange (15).reshape (3, 5) ),
'dataid' : 'g_g;r_g;r_r;',
'result' : None,
}
hmm = {
'hmm_a' : np.random.rand (9).reshape (3, 3), # test input of a two-dimensional (np.nd)array because one isn't enough and they will appear (during/after computation)
'hmm_b' : np.zeros (3),
'hmm_mu' : np.random.rand (1),
'hmm_pi' : np.random.rand (3),
'hmm_block' : np.random.rand (27).reshape (3, 3, 3), # test input of a three-dimensional (np.nd)array because of their appearance
}
'''
computation, changing pathname and filename, rest of the program
'''
write_in = content.copy ()
write_in.update (hmm)
path = os.path.join ( write_in ['rootpath'], write_in ['outdir'], write_in ['savefile'] )
p, filetype = os.path.splitext (path)
'''
if write_in == write_in ['data'] : # write_in only contains data
onlydata = True
else :
onlydata = False
'''
for c in write_in :
if type ( write_in [c] ) == np.ndarray :
write_in [c] = write_in [c].tolist ()
elif type ( write_in [c] ) == pd.DataFrame :
#write_in [c] = pd.DataFrame.to_numpy ( write_in [c], copy = True ).tolist () # needs pandas +0.24.0
write_in [c] = write_in [c].values.tolist ()
# saving as *.csv Comma Separated Values
if 'csv' in filetype or 'CSV' in filetype : # If chosen, take care when loading this file. Remember the data structure…!
if onlydata :
with open ( path, mode ) as f : # alternative 0; pd.DataFrame
write_in.to_csv ( f, header = None, index = False ) # alternative 0; pd.DataFrame
#write_in.to_csv ( path, header = None, index = False ) # alternative 1; pd.DataFrame
else : ### works.
# converting the chosen variables to linewise objects
for c in write_in :
if type ( write_in [c] ) == np.ndarray :
write_in [c] = write_in [c].tolist ()
elif type ( write_in [c] ) == pd.DataFrame :
#write_in [c] = write_in [c].to_numpy (copy = True).tolist () # alternative 2
#write_in [c] = DataFrame.to_numpy (write_in [c], copy = True).tolist () # alternative 1
write_in [c] = write_in [c].values.tolist () # alternative 0
# the saving itself
with open ( path, 'w', newline = '' ) as f :
w = csv.writer ( f, delimiter = ',', quotechar = '"' )#, quoting = csv.QUOTE_ALL ) # alternative 1; dict
for key, value in write_in.items () : # alternative 1; dict
w.writerow ( [key, value] ) # alternative 1; dict
# saving as *.json JavaScript Object Notation; the dict should be written into the file as onto the command line.
elif 'json' in filetype or 'JSON' in filetype :
if onlydata :
with open ( path, mode ) as f :
write_in.to_json (f)#, orient = 'columns' )#, orient = DataFrame )#, index = False ) # alternative 0; pd.DataFrame
else : ### works.
# converting the chosen variables to JSON serializable objects
for c in write_in :
if type ( write_in [c] ) == np.ndarray :
write_in [c] = write_in [c].tolist ()
elif type ( write_in [c] ) == pd.DataFrame :
write_in [c] = write_in [c].to_json ( orient = 'columns' )
# the saving itself
with open ( path, mode ) as f :
#f.write ( json.dumps (write_in) ) # alternative 1; dict
json.dump ( write_in, f, indent = 4 ) # alternative 0; dict
# saving as *.txt
elif 'txt' in filetype or 'TXT' in filetype :
if onlydata :
with open ( path, mode ) as f :
f.write ( str (write_in) ) ### Better make this with pd.iterrows
else :
with open ( path, mode ) as f :
f.write ( str (write_in) )
# saving as *.dat; this shouldn't make a difference for the file here as if saved as *.txt.
elif 'dat' in filetype or 'DAT' in filetype :
if onlydata :
with open ( path, mode = mode + 'b' ) as f :
f.write (write_in) ### Better make this with pd.iterrows…?
else :
with open ( path, mode = mode + 'b' ) as f :
f.write (write_in)
else :
print ( 'save_file: Unknown file format. Aborting program part.' )
# from *.csv
if 'csv' in filetype or 'CSV' in filetype :
read_out = {}
with open ( path, 'r' ) as f :
reader = csv.reader (f)
for k, v in reader :
read_out [k] = v
#for line in f : # if the above fails
#(key, val) = line.split (',') # if the above fails
#read_out [key] = val # if the above fails
#d = {} # if the above fails
#for key, val in read_out.iterrows () : # if the above fails
#d [key] = val # if the above fails
# converting the str the their original dtype, determined by what's in the str; better have put it in the file…?
for a in read_out :
if read_out [a] == '' :
if 'dir' in a : # Relative paths; set *indir*, *outdir* and *workdir* to *rootpath* if not specified.
read_out [a] = '.'
elif a == 'result' : # No result was achieved.
read_out [a] = None
elif a == 'dataid' :
if '[' == read_out [a] [0] and ']' == read_out [a] [-1] and "', '" in read_out [a] :
read_out [a] = read_out [a].split ("', '") [ 1 : -1 ] # Take away the enclosing brackets -> split up the long str to multiple short ones by the separating sequence of a str (list)
elif "', '" in read_out [a] :
read_out [a] = read_out [a].split ("', '")
elif '; ' in read_out [a] :
read_out [a] = read_out [a].split ('; ')
elif ';' in read_out [a] :
read_out [a] = read_out [a].split (';')
else :
seppi = input ( "read_configs: Couldn't determine the separating character of *dataid*. Please type it (Standard: comma): " )
if seppi == '' :
seppi = ','
if seppi in read_out [a] :
read_out [a] = read_out [a].split (seppi)
else :
read_out [a] = ['g_g', 'r_g', 'r_r']
elif a == 'datafiles' : # input list of data files which weren't taken into computation yet; absolute paths
read_out [a] = read_out [a].split ("'") [ 1 : -1 : 2 ]
elif '[' == read_out [a] [0] and ']' == read_out [a] [-1] : # Should be a np.ndarray because other entries with '[' and ']' are already sorted out.
if read_out [a].count ('[') == read_out [a].count (']') == 1 : # one-dimensional array
#floats = np.array ( read_out [a] [ 1 : -1 ].split (', ') [:] ).astype (np.float128) # alternative 1
#ints = np.array ( read_out [a] [ 1 : -1 ].split (', ') [:] ).astype (np.int64) # alternative 1
floats = np.fromstring ( read_out [a] [ 1 : -1 ], dtype = np.float128, sep = ',' ) # alternative 0
ints = np.fromstring ( read_out [a] [ 1 : -1 ], dtype = np.int64, sep = ',' ) # alternative 0
if ints.all () == floats.all () and not ints.all () == np.zeros (floats.size).all () :
read_out [a] = ints
else :
read_out [a] = floats
else : # multi-dimensional array; actually works only with two-dimensional arrays.
md_array = read_out [a] [ 2 : -2 ].split ('], [') # Removing '[[' from the beginning and ']]' from the end, splitting it up to a list where the elements are the inner np.ndarrays as str.
f, i = [], []
for b in range ( len (md_array) ) : # iterating over the length of *md_array*
#floats = np.array ( md_array [b].split (', ') ).astype (np.float128) # alternative 1
#ints = np.array ( md_array [b].split (', ') ).astype (np.int64) # alternative 1
floats = np.fromstring ( md_array [b], dtype = np.float128, sep = ',' ) # alternative 0
ints = np.fromstring ( md_array [b], dtype = np.int64, sep = ',' ) # alternative 0
f.append (floats)
i.append (ints)
floats = np.array (f)
ints = np.array (i)
if ints.all () == floats.all () and not ints.all () == np.zeros (floats.size).all () :
read_out [a] = ints
else :
read_out [a] = floats
# from *.json; the dict should be written into the file as onto the command line.
elif 'json' in filetype or 'JSON' in filetype :
with open ( path, 'r' ) as f : # alternative 0
read_out = json.load (f) # alternative 0
#f.read ( json.dumps (read_out) ) # alternative 1
# converting the str the their original dtype, determined by what's in the str; better have put it in the file…?
for a in read_out :
if a == 'result' or a == 'datafiles' or a == 'dataid' : # input list of data files which weren't taken into computation yet; absolute paths
pass
elif read_out [a] == '' and 'dir' in a : # Relative paths; set *indir*, *outdir* and *workdir* to *rootpath* if not specified.
read_out [a] = '.'
elif type ( read_out [a] ) == list :
#read_out [a] = pd.read_json ( read_out [a], numpy = True, precise_float = True )
read_out [a] = np.asarray ( read_out [a], dtype = np.float128 )
# from *.txt
elif 'txt' in filetype or 'TXT' in filetype :
with open ( path, 'r' ) as f :
reading = f.read ()
# from *.dat; this shouldn't make a difference for the file here as if saved as *.txt.
elif 'dat' in filetype or 'DAT' in filetype :
with open ( path, 'rb' ) as f :
f.read (read_out)
# Put the variables in the dicts *content* or *hmm*.
for a in read_out :
if 'hmm_' in a :
hmm [a] = read_out [a]
else :
content [a] = read_out [a]
if 'data' in content :
content ['data'] = pd.DataFrame ( content ['data'] )