用python从文本文件读取数据到postgres
我可以使用以下脚本将根目录中测试文件用python从文本文件读取数据到postgres,python,postgresql,file,psycopg2,Python,Postgresql,File,Psycopg2,我可以使用以下脚本将根目录中测试文件test.plt的内容读入postgres表tempo: $cat test.plt Geolife trajectory WGS 84 Altitude is in Feet Reserved 3 0,2,255,My Track,0,0,2,8421376 0 39.9756783,116.3308383,0,131.2,39717.4473148148,2008-09-26,10:44:08 39.9756649,116.3308749,0,131.2,
test.plt
的内容读入postgres
表tempo
:
$cat test.plt
Geolife trajectory
WGS 84
Altitude is in Feet
Reserved 3
0,2,255,My Track,0,0,2,8421376
0
39.9756783,116.3308383,0,131.2,39717.4473148148,2008-09-26,10:44:08
39.9756649,116.3308749,0,131.2,39717.4473842593,2008-09-26,10:44:14
39.97564,116.3308749,0,131.2,39717.4474189815,2008-09-26,10:44:17
39.9756533,116.3308583,0,131.2,39717.4474537037,2008-09-26,10:44:20
39.9756316,116.3308299,0,131.2,39717.4474884259,2008-09-26,10:44:23
39.9753166,116.3306299,0,131.2,39717.4480324074,2008-09-26,10:45:10
39.9753566,116.3305916,0,131.2,39717.4480671296,2008-09-26,10:45:13
39.9753516,116.3305249,0,131.2,39717.4481018518,2008-09-26,10:45:16
Python脚本:
import psycopg2
from config import config
import os
import glob
query = "INSERT INTO tempo (lat, lon, flag, alt, passeddate, gpsdate, gpstime) VALUES (%s, %s, %s, %s, %s, %s, %s)"
path = '~/Desktop/Data/'
conn = None
try:
#read the connection parameters
params = config()
# connect to the PostgreSQL server
conn = psycopg2.connect(**params)
cur = conn.cursor()
# INSERRT data to the database
with open('test.plt') as file:
file_content = file.readlines()[6:]
values = [line.strip().split(',') for line in file_content]
cur.executemany(query, values)
cur.close()
# commit the changes
conn.commit()
except (Exception, psycopg2.DatabaseError) as error:
print(error)
finally:
if conn is not None:
conn.close()
结果:
postgres=> SELECT * FROM tempo;
id | lat | lon | flag | alt | passeddate | gpsdate | gpstime
----+------------+-------------+------+-------+------------------+------------+----------
| 39.9756783 | 116.3308383 | 0 | 131.2 | 39717.4473148148 | 2008-09-26 | 10:44:08
| 39.9756649 | 116.3308749 | 0 | 131.2 | 39717.4473842593 | 2008-09-26 | 10:44:14
| 39.97564 | 116.3308749 | 0 | 131.2 | 39717.4474189815 | 2008-09-26 | 10:44:17
| 39.9756533 | 116.3308583 | 0 | 131.2 | 39717.4474537037 | 2008-09-26 | 10:44:20
| 39.9756316 | 116.3308299 | 0 | 131.2 | 39717.4474884259 | 2008-09-26 | 10:44:23
| 39.9753166 | 116.3306299 | 0 | 131.2 | 39717.4480324074 | 2008-09-26 | 10:45:10
| 39.9753566 | 116.3305916 | 0 | 131.2 | 39717.4480671296 | 2008-09-26 | 10:45:13
| 39.9753516 | 116.3305249 | 0 | 131.2 | 39717.4481018518 | 2008-09-26 | 10:45:16
(8 rows)
通过将python脚本中的insert语句替换为:
for root, dirs, files in os.walk(path):
for file in files:
if file.endswith(".plt"):
print(file.strip('.plt'))
输出:
20081210001529
20081113121334
20081205143505
20081029234123
20081202145929
20081204142253
20081111234235
20081118003844
20081105110052
20081023055305
如您所见,文件以图形命名。目标是获取每个文件名,将其插入tempo
表的id
字段,然后将其内容插入其余列。对所有子目录中的每个in文件重复此操作
例如20081210001529
)添加到插入查询(将其插入到表中)query = "INSERT INTO tempo (lat, lon, flag, alt, passeddate, gpsdate, gpstime) VALUES (%s, %s, %s, %s, %s, %s, %s)"
path = '~/Desktop/Data/'
#Establish connection to postgres
conn = None
try:
#read the connection parameters
params = config()
# connect to the PostgreSQL server
conn = psycopg2.connect(**params)
cur = conn.cursor()
for root, dirs, files in os.walk(path):
for file in files:
if file.endswith(".plt"):
f = open(file, 'r')
content = f.readlines()[6:]
values = [lines.strip().split(',') for line in content]
cur.executemany(query, values)
cur.close()
# commit the changes
conn.commit()
except (Exception, psycopg2.DatabaseError) as error:
print(error)
finally:
if conn is not None:
conn.close()
[Errno 2] No such file or directory: '20081210001529.plt'
非常感谢您在这项任务上的帮助。
文件
只保留文件名,您需要创建完整路径(os.path.join
)
我只是将代码放在两个函数中(第一个用于查找所有plt文件,第二个用于插入每个文件的内容),这使得错误处理更加容易:
def findFiles(rootDir):
pltFiles = []
for path, subdirs, files in os.walk(rootDir):
for x in files:
if x.endswith(".plt"):
pltFiles.append(os.path.join(path, x)) # create full path!
return pltFiles
def insert(pltFilePath):
query = "INSERT INTO tempo (lat, lon, flag, alt, passeddate, gpsdate, gpstime) VALUES (%s, %s, %s, %s, %s, %s, %s)"
conn = psycopg2.connect(**params)
cur = conn.cursor()
f = open(file, 'r')
content = f.readlines()[6:]
values = [lines.strip().split(',') for line in content]
status = False
try:
cur.executemany(query, values)
status = True
except (Exception, psycopg2.DatabaseError) as error:
print(error)
finally:
if conn is not None:
conn.close()
return status
path = '/tmp/data'
pltFiles = findFiles(path)
if not pltFiles:
print("No plt files found!")
for pltFile in pltFiles:
print("Processing: %s" % pltFile)
res = insert(pltFile)
# res holds true or false, so you could rename, move or delete the file:
#if res:
# os.remove(fullPath)