如何使查询sql数据库的python脚本的内存效率更高?
我有一个python脚本,用于进行sql查询。问题是,我的虚拟机只有512MB的RAM,一些sql查询占用的RAM太多,因此内核会自动终止脚本。如何使此代码的RAM效率更高?一个想法是更积极地将数据写入磁盘,而不是将其累积到RAM中。有人知道这一点的简单实现吗?我将非常感谢你的帮助 代码如何使查询sql数据库的python脚本的内存效率更高?,python,sql,python-3.x,memory-management,ram,Python,Sql,Python 3.x,Memory Management,Ram,我有一个python脚本,用于进行sql查询。问题是,我的虚拟机只有512MB的RAM,一些sql查询占用的RAM太多,因此内核会自动终止脚本。如何使此代码的RAM效率更高?一个想法是更积极地将数据写入磁盘,而不是将其累积到RAM中。有人知道这一点的简单实现吗?我将非常感谢你的帮助 代码 当您需要读取一个非常大的文件时,您可以逐行读取它,而不是将整个文件加载到RAM中 同样的逻辑也可以应用于此 你可以用。 也可能有帮助 即使可以这样做,处理整个表也可能需要很多时间。这是一种权衡。您的SQL是否允
当您需要读取一个非常大的文件时,您可以逐行读取它,而不是将整个文件加载到RAM中 同样的逻辑也可以应用于此 你可以用。 也可能有帮助
即使可以这样做,处理整个表也可能需要很多时间。这是一种权衡。您的SQL是否允许查询限制?如果是这样,您可以将其限制到某个可管理的数量,将其放入队列,并在继续查看其他结果之前清除其上的工作。@Gigaflop我不确定它是否允许限制。我怎么说呢?
from __future__ import print_function
try:
import psycopg2
except ImportError:
raise ImportError('\n\033[33mpsycopg2 library missing. pip install psycopg2\033[1;m\n')
sys.exit(1)
import re
import sys
import json
import pprint
import time
outfilepath = "crtsh_output/crtsh_flat_file"
DB_HOST = 'crt.sh'
DB_NAME = 'certwatch'
DB_USER = 'guest'
# DELAY = 0
def connect_to_db():
filepath = 'forager.txt'
with open(filepath) as fp:
unique_domains = ''
try:
conn = psycopg2.connect("dbname={0} user={1} host={2}".format(DB_NAME, DB_USER, DB_HOST))
cursor = conn.cursor()
for cnt, domain_name in enumerate(fp):
print("Line {}: {}".format(cnt, domain_name))
print(domain_name)
domain_name = domain_name.rstrip()
cursor.execute('''SELECT c.id, x509_commonName(c.certificate), x509_issuerName(c.certificate), x509_notBefore(c.certificate), x509_notAfter(c.certificate), x509_issuerName(c.certificate), x509_keyAlgorithm(c.certificate), x509_keySize(c.certificate), x509_publicKeyMD5(c.certificate), x509_publicKey(c.certificate), x509_rsaModulus(c.certificate), x509_serialNumber(c.certificate), x509_signatureHashAlgorithm(c.certificate), x509_signatureKeyAlgorithm(c.certificate), x509_subjectName(c.certificate), x509_name(c.certificate), x509_name_print(c.certificate), x509_commonName(c.certificate), x509_subjectKeyIdentifier(c.certificate), x509_extKeyUsages(c.certificate), x509_certPolicies(c.certificate), x509_canIssueCerts(c.certificate), x509_getPathLenConstraint(c.certificate), x509_altNames(c.certificate), x509_altNames_raw(c.certificate), x509_cRLDistributionPoints(c.certificate), x509_authorityInfoAccess(c.certificate), x509_print(c.certificate), x509_anyNamesWithNULs(c.certificate), x509_extensions(c.certificate), x509_tbscert_strip_ct_ext(c.certificate), x509_hasROCAFingerprint(c.certificate)
FROM certificate c, certificate_identity ci WHERE
c.id= ci.certificate_id AND ci.name_type = 'dNSName' AND lower(ci.name_value) =
lower(%s) AND x509_notAfter(c.certificate) > statement_timestamp()''', (domain_name,))
unique_domains = cursor.fetchall()
pprint.pprint(unique_domains)
outfilepath = "crtsh2" + ".json"
with open(outfilepath, 'a') as outfile:
outfile.write(json.dumps(unique_domains, sort_keys=True, indent=4, default=str, ensure_ascii = False))
# time.sleep(DELAY)
# conn.rollback()
except Exception as error:
# print("\n\033[1;31m[!] Unable to connect to the database\n\033[1;m")
# if tries < 3:
time.sleep(1) # give the DB a bit to recover if you want
# connect_to_db(tries+1)
# else:
raise error
if __name__ == "__main__":
connect_to_db()