增加Python中的内存限制？_Python_Memory Management

增加Python中的内存限制？

python memory-management

增加Python中的内存限制？,python,memory-management,Python,Memory Management,我目前正在使用一个函数来生成非常长的字典（用于比较DNA字符串），有时我会得到MemoryError。有没有一种方法可以为Python分配更多的内存，以便它可以一次处理更多的数据？Python不会限制程序的内存使用。它将根据您的程序需要分配尽可能多的内存，直到您的计算机内存不足。你能做的最多就是把上限降低到一个固定的上限。这可以通过资源模块来完成，但它不是您想要的您需要考虑使您的代码对内存/性能更友好。如果您使用linux，您可以尝试-一种简单的方法来运行需要比机器上安装的内存更多的程序但

我目前正在使用一个函数来生成非常长的字典（用于比较DNA字符串），有时我会得到MemoryError。

有没有一种方法可以为Python分配更多的内存，以便它可以一次处理更多的数据？

Python不会限制程序的内存使用。它将根据您的程序需要分配尽可能多的内存，直到您的计算机内存不足。你能做的最多就是把上限降低到一个固定的上限。这可以通过

资源

模块来完成，但它不是您想要的

您需要考虑使您的代码对内存/性能更友好。

如果您使用linux，您可以尝试-一种简单的方法来运行需要比机器上安装的内存更多的程序

但是，更好的方法是更新程序，以便在可能的情况下分块处理数据，或者扩展计算机内存，因为使用这种方法（使用较慢的磁盘设备）会导致性能下降。

Python有MomeoryError，这是您的系统RAM的限制，您已经使用

资源包手动定义了它
用插槽定义类可以让python解释器知道类的属性/成员是固定的。并且可以显著节省内存
您可以通过使用\uuuu slot\uuu
减少python解释器创建dict的次数。这将告诉解释器不要在内部创建dict并重用相同的变量
如果python进程所消耗的内存将继续随时间增长。这似乎是以下因素的组合：

Python中的C内存分配器是如何工作的。这本质上是内存碎片，因为除非整个内存块未使用，否则分配不能调用“free”。但是内存块的使用通常与您正在创建和使用的对象不完全一致
使用一些小字符串来比较数据。内部使用一个称为interning的过程，但创建多个小字符串会给解释器带来负载

最好的方法是创建工作线程或单线程池来完成工作，并使工作线程/终止无效以释放工作线程中附加/使用的资源
下面的代码创建单线程工作线程：
__slot__ = ('dna1','dna2','lock','errorResultMap')
lock = threading.Lock()
errorResultMap = []
def process_dna_compare(dna1, dna2):
    with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
        futures = {executor.submit(getDnaDict, lock, dna_key): dna_key for dna_key in dna1}
    '''max_workers=1 will create single threadpool'''
    dna_differences_map={}
    count = 0
    dna_processed = False;
    for future in concurrent.futures.as_completed(futures):
        result_dict = future.result()
        if result_dict :
            count += 1
            '''Do your processing XYZ here'''
    logger.info('Total dna keys processed ' + str(count))

def getDnaDict(lock,dna_key):
    '''process dna_key here and return item'''
    try:
        dataItem = item[0]
        return dataItem
    except:
        lock.acquire()
        errorResultMap.append({'dna_key_1': '', 'dna_key_2': dna_key_2, 'dna_key_3': dna_key_3,
                          'dna_key_4': 'No data for dna found'})
        lock.release()
        logger.error('Error in processing dna :'+ dna_key)
    pass

if __name__ == "__main__":
    dna1 = '''get data for dna1'''
    dna2 = '''get data for dna2'''
    process_dna_compare(dna1,dna2)
    if errorResultMap != []:
       ''' print or write to file the errorResultMap'''

import objgraph
import random
import inspect

class Dna(object):
    def __init__(self):
        self.val = None
    def __str__(self):
        return "dna – val: {0}".format(self.val)

def f():
    l = []
    for i in range(3):
        dna = Dna()
        #print “id of dna: {0}”.format(id(dna))
        #print “dna is: {0}”.format(dna)
        l.append(dna)
    return l

def main():
    d = {}
    l = f()
    d['k'] = l
    print("list l has {0} objects of type Dna()".format(len(l)))
    objgraph.show_most_common_types()
    objgraph.show_backrefs(random.choice(objgraph.by_type('Dna')),
    filename="dna_refs.png")

    objgraph.show_refs(d, filename='myDna-image.png')

if __name__ == "__main__":
    main()

list l has 3 objects of type Dna()
function                   2021
wrapper_descriptor         1072
dict                       998
method_descriptor          778
builtin_function_or_method 759
tuple                      667
weakref                    577
getset_descriptor          396
member_descriptor          296
type                       180

下面的代码将帮助您了解内存使用情况：
__slot__ = ('dna1','dna2','lock','errorResultMap')
lock = threading.Lock()
errorResultMap = []
def process_dna_compare(dna1, dna2):
    with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
        futures = {executor.submit(getDnaDict, lock, dna_key): dna_key for dna_key in dna1}
    '''max_workers=1 will create single threadpool'''
    dna_differences_map={}
    count = 0
    dna_processed = False;
    for future in concurrent.futures.as_completed(futures):
        result_dict = future.result()
        if result_dict :
            count += 1
            '''Do your processing XYZ here'''
    logger.info('Total dna keys processed ' + str(count))

def getDnaDict(lock,dna_key):
    '''process dna_key here and return item'''
    try:
        dataItem = item[0]
        return dataItem
    except:
        lock.acquire()
        errorResultMap.append({'dna_key_1': '', 'dna_key_2': dna_key_2, 'dna_key_3': dna_key_3,
                          'dna_key_4': 'No data for dna found'})
        lock.release()
        logger.error('Error in processing dna :'+ dna_key)
    pass

if __name__ == "__main__":
    dna1 = '''get data for dna1'''
    dna2 = '''get data for dna2'''
    process_dna_compare(dna1,dna2)
    if errorResultMap != []:
       ''' print or write to file the errorResultMap'''

import objgraph
import random
import inspect

class Dna(object):
    def __init__(self):
        self.val = None
    def __str__(self):
        return "dna – val: {0}".format(self.val)

def f():
    l = []
    for i in range(3):
        dna = Dna()
        #print “id of dna: {0}”.format(id(dna))
        #print “dna is: {0}”.format(dna)
        l.append(dna)
    return l

def main():
    d = {}
    l = f()
    d['k'] = l
    print("list l has {0} objects of type Dna()".format(len(l)))
    objgraph.show_most_common_types()
    objgraph.show_backrefs(random.choice(objgraph.by_type('Dna')),
    filename="dna_refs.png")

    objgraph.show_refs(d, filename='myDna-image.png')

if __name__ == "__main__":
    main()

list l has 3 objects of type Dna()
function                   2021
wrapper_descriptor         1072
dict                       998
method_descriptor          778
builtin_function_or_method 759
tuple                      667
weakref                    577
getset_descriptor          396
member_descriptor          296
type                       180

内存使用的输出：
__slot__ = ('dna1','dna2','lock','errorResultMap')
lock = threading.Lock()
errorResultMap = []
def process_dna_compare(dna1, dna2):
    with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
        futures = {executor.submit(getDnaDict, lock, dna_key): dna_key for dna_key in dna1}
    '''max_workers=1 will create single threadpool'''
    dna_differences_map={}
    count = 0
    dna_processed = False;
    for future in concurrent.futures.as_completed(futures):
        result_dict = future.result()
        if result_dict :
            count += 1
            '''Do your processing XYZ here'''
    logger.info('Total dna keys processed ' + str(count))

def getDnaDict(lock,dna_key):
    '''process dna_key here and return item'''
    try:
        dataItem = item[0]
        return dataItem
    except:
        lock.acquire()
        errorResultMap.append({'dna_key_1': '', 'dna_key_2': dna_key_2, 'dna_key_3': dna_key_3,
                          'dna_key_4': 'No data for dna found'})
        lock.release()
        logger.error('Error in processing dna :'+ dna_key)
    pass

if __name__ == "__main__":
    dna1 = '''get data for dna1'''
    dna2 = '''get data for dna2'''
    process_dna_compare(dna1,dna2)
    if errorResultMap != []:
       ''' print or write to file the errorResultMap'''

import objgraph
import random
import inspect

class Dna(object):
    def __init__(self):
        self.val = None
    def __str__(self):
        return "dna – val: {0}".format(self.val)

def f():
    l = []
    for i in range(3):
        dna = Dna()
        #print “id of dna: {0}”.format(id(dna))
        #print “dna is: {0}”.format(dna)
        l.append(dna)
    return l

def main():
    d = {}
    l = f()
    d['k'] = l
    print("list l has {0} objects of type Dna()".format(len(l)))
    objgraph.show_most_common_types()
    objgraph.show_backrefs(random.choice(objgraph.by_type('Dna')),
    filename="dna_refs.png")

    objgraph.show_refs(d, filename='myDna-image.png')

if __name__ == "__main__":
    main()

list l has 3 objects of type Dna()
function                   2021
wrapper_descriptor         1072
dict                       998
method_descriptor          778
builtin_function_or_method 759
tuple                      667
weakref                    577
getset_descriptor          396
member_descriptor          296
type                       180

有关插槽的更多信息，请访问：
尝试将py从32位更新为64位
只需在命令行中键入python
，您就会看到您的python是什么。32位python的内存非常低。
python 64位有更多的内存支持。我会说实数，但我不记得了（我在StackOverflow问题中看到了这一点。）我正在比较长度为300-500万个字符的字符串，在这个过程中，为每个字符串创建一个字典，其中包含的键数与其长度大致相同。那算是很多吗？@Maor那肯定很多。你应该考虑重构你的代码。嘿，如果是DNA，那么这些字典怎么有这么多键？你用了多少RAM？你能在问题本身而不是评论中添加关于数据的详细信息吗？再详细一点。如果它是32位版本的Python，那么使用64位可能会让您受益匪浅。取决于。或者直到达到操作系统中的限制（例如，在linux上，您可以通过配置轻松施加限制）