递归dir()一个python对象,用于查找特定类型或具有特定值的值
我有一个复杂的Python数据结构(如果重要的话,它是一个大的music21 Score对象),由于对象结构深处存在一个weakref,所以它不会被pickle。我以前用堆栈跟踪和python调试器调试过这样的问题,但这总是一个很大的难题。是否有一个工具可以对对象的所有属性递归运行dir(),查找隐藏在列表、元组、dict等中的对象,并返回与某个值匹配的对象(lambda函数或类似的函数)。一个大问题是递归引用,因此需要某种备忘录函数(如copy.deepcopy使用)。我试过:递归dir()一个python对象,用于查找特定类型或具有特定值的值,python,recursive-descent,recursive-datastructures,Python,Recursive Descent,Recursive Datastructures,我有一个复杂的Python数据结构(如果重要的话,它是一个大的music21 Score对象),由于对象结构深处存在一个weakref,所以它不会被pickle。我以前用堆栈跟踪和python调试器调试过这样的问题,但这总是一个很大的难题。是否有一个工具可以对对象的所有属性递归运行dir(),查找隐藏在列表、元组、dict等中的对象,并返回与某个值匹配的对象(lambda函数或类似的函数)。一个大问题是递归引用,因此需要某种备忘录函数(如copy.deepcopy使用)。我试过: import
import weakref
def findWeakRef(streamObj, memo=None):
weakRefList = []
if memo is None:
memo = {}
for x in dir(streamObj):
xValue = getattr(streamObj, x)
if id(xValue) in memo:
continue
else:
memo[id(xValue)] = True
if type(xValue) is weakref.ref:
weakRefList.append(x, xValue, streamObj)
if hasattr(xValue, "__iter__"):
for i in xValue:
if id(i) in memo:
pass
else:
memo[id(i)] = True
weakRefList.extend(findWeakRef(i), memo)
else:
weakRefList.extend(findWeakRef(xValue), memo)
return weakRefList
我可能会继续填补这方面的漏洞(例如,iter不是我想要的dicts),但在我投入更多时间之前,我想知道是否有人知道一个更简单的答案。这可能是一个非常有用的通用工具。这似乎是答案的开始。我不得不从Python3.2中向后移植一些项目,使其能够工作,这样它就不会调用只不断生成新对象的属性。下面是我想到的代码:
#-------------------------------------------------------------------------------
# Name: treeYield.py
# Purpose: traverse a complex datastructure and yield elements
# that fit a given criteria
#
# Authors: Michael Scott Cuthbert
#
# Copyright: Copyright © 2012 Michael Scott Cuthbert
# License: CC-BY
#-------------------------------------------------------------------------------
import types
class TreeYielder(object):
def __init__(self, yieldValue = None):
'''
`yieldValue` should be a lambda function that
returns True/False or a function/method call that
will be passed the value of a current attribute
'''
self.currentStack = []
self.yieldValue = yieldValue
self.stackVals = []
t = types
self.nonIterables = [t.IntType, t.StringType, t.UnicodeType, t.LongType,
t.FloatType, t.NoneType, t.BooleanType]
def run(self, obj, memo = None):
'''
traverse all attributes of an object looking
for subObjects that meet a certain criteria.
yield them.
`memo` is a dictionary to keep track of objects
that have already been seen
The original object is added to the memo and
also checked for yieldValue
'''
if memo is None:
memo = {}
self.memo = memo
if id(obj) in self.memo:
self.memo[id(obj)] += 1
return
else:
self.memo[id(obj)] = 1
if self.yieldValue(obj) is True:
yield obj
### now check for sub values...
self.currentStack.append(obj)
tObj = type(obj)
if tObj in self.nonIterables:
pass
elif tObj == types.DictType:
for keyX in obj:
dictTuple = ('dict', keyX)
self.stackVals.append(dictTuple)
x = obj[keyX]
for z in self.run(x, memo=memo):
yield z
self.stackVals.pop()
elif tObj in [types.ListType, types.TupleType]:
for i,x in enumerate(obj):
listTuple = ('listLike', i)
self.stackVals.append(listTuple)
for z in self.run(x, memo=memo):
yield z
self.stackVals.pop()
else: # objects or uncaught types...
### from http://bugs.python.org/file18699/static.py
try:
instance_dict = object.__getattribute__(obj, "__dict__")
except AttributeError:
## probably uncaught static object
return
for x in instance_dict:
try:
gotValue = object.__getattribute__(obj, x)
except: # ?? property that relies on something else being set.
continue
objTuple = ('getattr', x)
self.stackVals.append(objTuple)
try:
for z in self.run(gotValue, memo=memo):
yield z
except RuntimeError:
raise Exception("Maximum recursion on:\n%s" % self.currentLevel())
self.stackVals.pop()
self.currentStack.pop()
def currentLevel(self):
currentStr = ""
for stackType, stackValue in self.stackVals:
if stackType == 'dict':
if isinstance(stackValue, str):
currentStr += "['" + stackValue + "']"
elif isinstance(stackValue, unicode):
currentStr += "[u'" + stackValue + "']"
else: # numeric key...
currentStr += "[" + str(stackValue) + "]"
elif stackType == 'listLike':
currentStr += "[" + str(stackValue) + "]"
elif stackType == 'getattr':
currentStr += ".__getattribute__('" + stackValue + "')"
else:
raise Exception("Cannot get attribute of type %s" % stackType)
return currentStr
此代码允许您运行如下内容:
class Mock(object):
def __init__(self, mockThing, embedMock = True):
self.abby = 30
self.mocker = mockThing
self.mockList = [mockThing, mockThing, 40]
self.embeddedMock = None
if embedMock is True:
self.embeddedMock = Mock(mockThing, embedMock = False)
mockType = lambda x: x.__class__.__name__ == 'Mock'
subList = [100, 60, -2]
myList = [5, 20, [5, 12, 17], 30, {'hello': 10, 'goodbye': 22, 'mock': Mock(subList)}, -20, Mock(subList)]
myList.append(myList)
ty = TreeYielder(mockType)
for val in ty.run(myList):
print(val, ty.currentLevel())
并获得:
(<__main__.Mock object at 0x01DEBD10>, "[4]['mock']")
(<__main__.Mock object at 0x01DEF370>, "[4]['mock'].__getattribute__('embeddedMock')")
(<__main__.Mock object at 0x01DEF390>, '[6]')
(<__main__.Mock object at 0x01DEF3B0>, "[6].__getattribute__('embeddedMock')")
(20, '[1]')
(12, '[2][1]')
(17, '[2][2]')
(30, '[3]')
(22, "[4]['goodbye']")
(100, "[4]['mock'].__getattribute__('embeddedMock').__getattribute__('mocker')[0]")
(60, "[4]['mock'].__getattribute__('embeddedMock').__getattribute__('mocker')[1]")
(40, "[4]['mock'].__getattribute__('embeddedMock').__getattribute__('mockList')[2]")
并获得:
(<__main__.Mock object at 0x01DEBD10>, "[4]['mock']")
(<__main__.Mock object at 0x01DEF370>, "[4]['mock'].__getattribute__('embeddedMock')")
(<__main__.Mock object at 0x01DEF390>, '[6]')
(<__main__.Mock object at 0x01DEF3B0>, "[6].__getattribute__('embeddedMock')")
(20, '[1]')
(12, '[2][1]')
(17, '[2][2]')
(30, '[3]')
(22, "[4]['goodbye']")
(100, "[4]['mock'].__getattribute__('embeddedMock').__getattribute__('mocker')[0]")
(60, "[4]['mock'].__getattribute__('embeddedMock').__getattribute__('mocker')[1]")
(40, "[4]['mock'].__getattribute__('embeddedMock').__getattribute__('mockList')[2]")
我仍在试图找出原因。找不到abby,但我认为即使在这一点上,它也值得发布,因为它比我开始时更加正确。这里有一个更简单的解决方案,有点幼稚。即,仅在属性树下进行深度优先搜索。如果是原语,则停止,否则在树中深入。它将获得调用树和叶的值
def recursive_dir(obj, path):
if ((obj!=None) and (not isinstance(obj, (str,float,int,list,dict,set)))):
for attr, val in obj.__dict__.iteritems():
temp_path = path[:]
temp_path.append(attr)
recursive_dir(getattr(obj, attr), temp_path)
else:
print (path, "--->", obj)
print("")
recursive_dir(x,[])
我还没有看到现成的解决方案。也许gc.get_referents而不是dir会让你走得更远。它确实减少了麻烦(eq等),但代价是返回的内容的格式会因对象的类型而改变,因此它可能是一个更快的解决方案,但并不简单。它似乎也不支持递归。谢谢你是否考虑过子类<代码>泡菜?Pickler < /代码>类?源代码包含在
../Lib/pickle.py
中。这样做应该允许您重用大量代码并捕获PickleError
s来执行您所描述的操作——以及利用Python已经存在的完善的pickle协议。评论不错。我还需要这个脚本来做其他的事情,但这似乎是一个很好的方法。非常简单,尽管查看列表等易受攻击的内容对我找到那些讨厌的弱点也很重要。