Python 如何释放numpy处理的内存
输出:Python 如何释放numpy处理的内存,python,numpy,memory,Python,Numpy,Memory,输出: # encoding: utf-8 import sys import commands import time import gc import numpy process=sys.argv[0] def get_use_memory(): global process return commands.getstatusoutput('ps aux | grep "{0}" | grep -v "grep"'.format(process)) def norm
# encoding: utf-8
import sys
import commands
import time
import gc
import numpy
process=sys.argv[0]
def get_use_memory():
global process
return commands.getstatusoutput('ps aux | grep "{0}" | grep -v "grep"'.format(process))
def normalize_feature(node, delete_list):
print 'normalize_feature_step1', get_use_memory()
normal_features = []
for i in range(0, node.shape[0]):
feature_numpy = node[i, :]
feature_numpy_d = numpy.delete(feature_numpy, delete_list, axis=0)
normal_features.append(feature_numpy_d)
del feature_numpy
del feature_numpy_d
print 'normalize_feature_step2', get_use_memory()
np_normal_features = numpy.array(normal_features)
print sys.getsizeof(np_normal_features) / float(1024) / float(1024)
print 'normalize_feature_step3', get_use_memory()
del normal_features
gc.collect()
print 'normalize_feature_step4', get_use_memory()
return np_normal_features
#gc.set_debug(gc.DEBUG_STATS|gc.DEBUG_LEAK)
rows=1024
columns=10240
a = []
for i in range(0, rows):
b = []
for j in range(0, columns):
b.append(float(i) * j)
a.append(b)
del b
print get_use_memory()
node_1 = numpy.array(a)
print sys.getsizeof(node_1) / float(1024) / float(1024)
print get_use_memory()
del a
gc.collect()
print get_use_memory()
node_2 = normalize_feature(node_1, [0, 100, 1000])
print sys.getsizeof(node_2) / float(1024) / float(1024)
print get_use_memory()
del node_1
del node_2
gc.collect()
print get_use_memory()
(0, 'wangye 5319 96.5 1.0 581036 360528 pts/28 S+ 11:23 0:03 python test.py')
80.0001068115
(0, 'wangye 5319 106 1.3 662964 442456 pts/28 S+ 11:23 0:04 python test.py')
(0, 'wangye 5319 112 0.2 316812 98072 pts/28 S+ 11:23 0:04 python test.py')
normalize_feature_step1 (0, 'wangye 5319 112 0.2 316812 98072 pts/28 S+ 11:23 0:04 python test.py')
normalize_feature_step2 (0, 'wangye 5319 115 0.5 398372 179704 pts/28 S+ 11:23 0:04 python test.py')
79.9766693115
normalize_feature_step3 (0, 'wangye 5319 116 0.7 480272 261596 pts/28 S+ 11:23 0:04 python test.py')
normalize_feature_step4 (0, 'wangye 5319 116 0.5 398688 180148 pts/28 S+ 11:23 0:04 python test.py')
79.9766693115
(0, 'wangye 5319 116 0.5 398688 180148 pts/28 S+ 11:23 0:04 python test.py')
(0, 'wangye 5319 117 0.0 234864 16324 pts/28 S+ 11:23 0:04 python test.py')
在规格化特征步骤3和规格化特征步骤4之间释放80M内存。因为del normal_特性发布了它的项,即numpy.ndarray。最后的内存只有16M
但当我将代码的第38行和第39行更改为:
行=10240
列=1024
输出:
# encoding: utf-8
import sys
import commands
import time
import gc
import numpy
process=sys.argv[0]
def get_use_memory():
global process
return commands.getstatusoutput('ps aux | grep "{0}" | grep -v "grep"'.format(process))
def normalize_feature(node, delete_list):
print 'normalize_feature_step1', get_use_memory()
normal_features = []
for i in range(0, node.shape[0]):
feature_numpy = node[i, :]
feature_numpy_d = numpy.delete(feature_numpy, delete_list, axis=0)
normal_features.append(feature_numpy_d)
del feature_numpy
del feature_numpy_d
print 'normalize_feature_step2', get_use_memory()
np_normal_features = numpy.array(normal_features)
print sys.getsizeof(np_normal_features) / float(1024) / float(1024)
print 'normalize_feature_step3', get_use_memory()
del normal_features
gc.collect()
print 'normalize_feature_step4', get_use_memory()
return np_normal_features
#gc.set_debug(gc.DEBUG_STATS|gc.DEBUG_LEAK)
rows=1024
columns=10240
a = []
for i in range(0, rows):
b = []
for j in range(0, columns):
b.append(float(i) * j)
a.append(b)
del b
print get_use_memory()
node_1 = numpy.array(a)
print sys.getsizeof(node_1) / float(1024) / float(1024)
print get_use_memory()
del a
gc.collect()
print get_use_memory()
node_2 = normalize_feature(node_1, [0, 100, 1000])
print sys.getsizeof(node_2) / float(1024) / float(1024)
print get_use_memory()
del node_1
del node_2
gc.collect()
print get_use_memory()
(0, 'wangye 5319 96.5 1.0 581036 360528 pts/28 S+ 11:23 0:03 python test.py')
80.0001068115
(0, 'wangye 5319 106 1.3 662964 442456 pts/28 S+ 11:23 0:04 python test.py')
(0, 'wangye 5319 112 0.2 316812 98072 pts/28 S+ 11:23 0:04 python test.py')
normalize_feature_step1 (0, 'wangye 5319 112 0.2 316812 98072 pts/28 S+ 11:23 0:04 python test.py')
normalize_feature_step2 (0, 'wangye 5319 115 0.5 398372 179704 pts/28 S+ 11:23 0:04 python test.py')
79.9766693115
normalize_feature_step3 (0, 'wangye 5319 116 0.7 480272 261596 pts/28 S+ 11:23 0:04 python test.py')
normalize_feature_step4 (0, 'wangye 5319 116 0.5 398688 180148 pts/28 S+ 11:23 0:04 python test.py')
79.9766693115
(0, 'wangye 5319 116 0.5 398688 180148 pts/28 S+ 11:23 0:04 python test.py')
(0, 'wangye 5319 117 0.0 234864 16324 pts/28 S+ 11:23 0:04 python test.py')
内存在规格化特征步骤3和规格化特征步骤4之间没有任何变化。最后的内存是98M
所以我想也许numpy能处理一些记忆。我想知道如何释放记忆。
谢谢 调用
gc.collect()
并不意味着内存将被回收。它只是告诉gc循环使用,是否循环使用由它自己决定,所以不能直接让gc循环使用。这不是numpy的问题,而是每种拥有gc的语言的普遍问题。只是您正在犯的一个快速错误:
(0, 'wangye 5400 99.5 1.1 604944 385888 pts/28 S+ 11:25 0:03 python test.py')
80.0001068115
(0, 'wangye 5400 109 1.4 686872 467892 pts/28 S+ 11:25 0:04 python test.py')
(0, 'wangye 5400 116 0.2 317024 98176 pts/28 S+ 11:25 0:04 python test.py')
normalize_feature_step1 (0, 'wangye 5400 116 0.2 317024 98176 pts/28 S+ 11:25 0:04 python test.py')
normalize_feature_step2 (0, 'wangye 5400 100 0.5 399592 180852 pts/28 S+ 11:25 0:05 python test.py')
79.7657318115
normalize_feature_step3 (0, 'wangye 5400 101 0.8 481276 262576 pts/28 S+ 11:25 0:05 python test.py')
normalize_feature_step4 (0, 'wangye 5400 101 0.7 480444 261904 pts/28 S+ 11:25 0:05 python test.py')
79.7657318115
(0, 'wangye 5400 101 0.7 480444 261904 pts/28 S+ 11:25 0:05 python test.py')
(0, 'wangye 5400 101 0.2 316836 98296 pts/28 S+ 11:25 0:05 python test.py')
向列表中添加元素不会复制该元素,它只是向列表中添加指向该元素的指针(is
关键字检查标识匹配)
因此,在将a
添加到列表后执行dela
操作时,您并没有释放a
的内存,您只是从a
中删除命名引用。也就是说,名为a
的变量将不再存在,但其内容仍将保留在内存中,由b[0]
指出
简而言之,python中的del
语句并不等同于C/C++中的free
。如果内存是您关心的问题,那么您根本不应该使用python,或者您应该更加小心地处理代码,但是依赖del
不是一个选项<代码>删除不应用于这些目的
例如,当您简单地添加numpy数组时,会动态创建相同大小的临时数组来存储中间值,从而消耗比可能需要的内存更多的内存
numpy和python通常都是用于快速原型化的语言/库。如果您需要完全控制所使用的资源,那么应该使用C/C++(或其他编译语言)。任何解释的语言都会自动释放内存,然后存储停止被指向,但它不会是即时的,你不能依赖它。调用gc.collect()是什么意思?我知道。非常感谢。