返回脚本中使用的导入Python模块的列表?

返回脚本中使用的导入Python模块的列表?,python,module,Python,Module,我正在编写一个程序,对Python文件列表进行分类,根据这些文件导入模块。因此,我需要扫描.py文件集合并返回它们导入的模块列表。例如,如果我导入的其中一个文件具有以下行: import os import sys, gtk 我希望它能返回: ["os", "sys", "gtk"] 我玩了modulefinder并写道: from modulefinder import ModuleFinder finder = ModuleFinder() finder.run_script('tes

我正在编写一个程序,对Python文件列表进行分类,根据这些文件导入模块。因此,我需要扫描.py文件集合并返回它们导入的模块列表。例如,如果我导入的其中一个文件具有以下行:

import os
import sys, gtk
我希望它能返回:

["os", "sys", "gtk"]
我玩了modulefinder并写道:

from modulefinder import ModuleFinder

finder = ModuleFinder()
finder.run_script('testscript.py')

print 'Loaded modules:'
for name, mod in finder.modules.iteritems():
    print '%s ' % name,
但这返回的不仅仅是脚本中使用的模块。例如,脚本中仅包含:

import os
print os.getenv('USERNAME')
从ModuleFinder脚本返回的模块返回:

tokenize  heapq  __future__  copy_reg  sre_compile  _collections  cStringIO  _sre  functools  random  cPickle  __builtin__  subprocess  cmd  gc  __main__  operator  array  select  _heapq  _threading_local  abc  _bisect  posixpath  _random  os2emxpath  tempfile  errno  pprint  binascii  token  sre_constants  re  _abcoll  collections  ntpath  threading  opcode  _struct  _warnings  math  shlex  fcntl  genericpath  stat  string  warnings  UserDict  inspect  repr  struct  sys  pwd  imp  getopt  readline  copy  bdb  types  strop  _functools  keyword  thread  StringIO  bisect  pickle  signal  traceback  difflib  marshal  linecache  itertools  dummy_thread  posix  doctest  unittest  time  sre_parse  os  pdb  dis
…而我只希望它返回“os”,因为这是脚本中使用的模块

有人能帮我做到这一点吗


更新:我只想澄清一下,我希望在不运行正在分析的Python文件的情况下执行此操作,而只是扫描代码。

我认为最好的方法是使用包。作者已经完成了所有必要的工作,不仅直接导入了模块,而且还使用AST解析了运行时依赖项的代码,而更静态的分析可能会遗漏这些依赖项

制作了一个命令示例来演示:

sfood ./example.py | sfood-cluster > example.deps
这将生成每个唯一模块的基本依赖项文件。要了解更多详细信息,请使用:

sfood -r -i ./example.py | sfood-cluster > example.deps
要遍历树并查找所有导入,还可以在代码中执行此操作: 请注意-此例行程序的AST块是从拥有此版权的snakefood来源中提取的:版权(C)2001-2007 Martin Blais。版权所有

 import os
 import compiler
 from compiler.ast import Discard, Const
 from compiler.visitor import ASTVisitor

 def pyfiles(startPath):
     r = []
     d = os.path.abspath(startPath)
     if os.path.exists(d) and os.path.isdir(d):
         for root, dirs, files in os.walk(d):
             for f in files:
                 n, ext = os.path.splitext(f)
                 if ext == '.py':
                     r.append([d, f])
     return r

 class ImportVisitor(object):
     def __init__(self):
         self.modules = []
         self.recent = []
     def visitImport(self, node):
         self.accept_imports()
         self.recent.extend((x[0], None, x[1] or x[0], node.lineno, 0)
                            for x in node.names)
     def visitFrom(self, node):
         self.accept_imports()
         modname = node.modname
         if modname == '__future__':
             return # Ignore these.
         for name, as_ in node.names:
             if name == '*':
                 # We really don't know...
                 mod = (modname, None, None, node.lineno, node.level)
             else:
                 mod = (modname, name, as_ or name, node.lineno, node.level)
             self.recent.append(mod)
     def default(self, node):
         pragma = None
         if self.recent:
             if isinstance(node, Discard):
                 children = node.getChildren()
                 if len(children) == 1 and isinstance(children[0], Const):
                     const_node = children[0]
                     pragma = const_node.value
         self.accept_imports(pragma)
     def accept_imports(self, pragma=None):
         self.modules.extend((m, r, l, n, lvl, pragma)
                             for (m, r, l, n, lvl) in self.recent)
         self.recent = []
     def finalize(self):
         self.accept_imports()
         return self.modules

 class ImportWalker(ASTVisitor):
     def __init__(self, visitor):
         ASTVisitor.__init__(self)
         self._visitor = visitor
     def default(self, node, *args):
         self._visitor.default(node)
         ASTVisitor.default(self, node, *args) 

 def parse_python_source(fn):
     contents = open(fn, 'rU').read()
     ast = compiler.parse(contents)
     vis = ImportVisitor() 

     compiler.walk(ast, vis, ImportWalker(vis))
     return vis.finalize()

 for d, f in pyfiles('/Users/bear/temp/foobar'):
     print d, f
     print parse_python_source(os.path.join(d, f)) 

嗯,您可以编写一个简单的脚本,在文件中搜索
import
语句。此文件将查找所有导入的模块和文件,包括在函数或类中导入的模块和文件:

def find_imports(toCheck):
    """
    Given a filename, returns a list of modules imported by the program.
    Only modules that can be imported from the current directory
    will be included. This program does not run the code, so import statements
    in if/else or try/except blocks will always be included.
    """
    import imp
    importedItems = []
    with open(toCheck, 'r') as pyFile:
        for line in pyFile:
            # ignore comments
            line = line.strip().partition("#")[0].partition("as")[0].split(' ')
            if line[0] == "import":
                for imported in line[1:]:
                    # remove commas (this doesn't check for commas if
                    # they're supposed to be there!
                    imported = imported.strip(", ")
                    try:
                        # check to see if the module can be imported
                        # (doesn't actually import - just finds it if it exists)
                        imp.find_module(imported)
                        # add to the list of items we imported
                        importedItems.append(imported)
                    except ImportError:
                        # ignore items that can't be imported
                        # (unless that isn't what you want?)
                        pass

    return importedItems

toCheck = raw_input("Which file should be checked: ")
print find_imports(toCheck)
这对从模块导入内容的
没有任何作用,尽管可以很容易地添加,这取决于您想要如何处理这些内容。它也不做任何语法检查,因此,如果您有一些有趣的业务,比如
import sys gtk,os
,它会认为您已经导入了所有三个模块,即使这行代码是错误的。它也不处理与导入相关的
try
/
语句,除了
类型语句-如果可以导入,此函数将列出它。如果将
用作
关键字,它也不能很好地处理每行的多个导入。这里真正的问题是,我必须编写一个完整的解析器才能真正正确地做到这一点。给定的代码在很多情况下都能工作,只要您知道有明确的情况


一个问题是,如果此脚本与给定文件不在同一目录中,则相对导入将失败。您可能希望将给定脚本的目录添加到
sys.path

这取决于您希望的彻底程度。使用的模块是一个图灵完全问题:有些python代码使用延迟导入来只导入它们在特定运行中实际使用的东西,有些则生成要动态导入的东西(例如插件系统)


python-v将跟踪导入语句——这可以说是最简单的检查方法。

我正在编辑我的原始答案来说明这一点。这对于下面这样的代码片段是可行的,但是解析可能是最好的方法

def iter_imports(fd):
    """ Yield only lines that appear to be imports from an iterable.
        fd can be an open file, a list of lines, etc.
    """
    for line in fd:
        trimmed = line.strip()
        if trimmed.startswith('import '):
            yield trimmed
        elif trimmed.startswith('from ') and ('import ' in trimmed):
            yield trimmed

def main():
    # File name to read.
    filename = '/my/path/myfile.py'
    # Safely open the file, exit on error
    try:
        with open(filename) as f:
            # Iterate over the lines in this file, and generate a list of
            # lines that appear to be imports.
            import_lines = list(iter_imports(f))
    except (IOError, OSError) as exIO:
        print('Error opening file: {}\n{}'.format(filename, exIO))
        return 1
    else:
        # From here, import_lines should be a list of lines like this:
        #     from module import thing
        #     import os, sys
        #     from module import *
        # Do whatever you need to do with the import lines.
        print('\n'.join(import_lines))

    return 0

if __name__ == '__main__':
    sys.exit(main())

需要进一步的字符串解析来获取模块名称。这不适用于多行字符串或文档字符串包含单词“import”或“from X import”的情况。这就是我建议解析AST的原因。

对于大多数只在顶层导入模块的脚本,将文件作为模块加载并扫描其成员以查找模块就足够了:

import sys,io,imp,types
scriptname = 'myfile.py'
with io.open(scriptname) as scriptfile:
    code = compile(scriptfile.readall(),scriptname,'exec')
newmodule = imp.new_module('__main__')
exec(codeobj,newmodule.__dict__)
scriptmodules = [name for name in dir(newmodule) if isinstance(newmodule.__dict__[name],types.ModuleType)]

通过将模块的名称设置为
“\uuuuu main\uuuuuuu”
,可以模拟作为脚本运行的模块。因此,它还应该捕获时髦的动态模块加载。它不会捕获的唯一模块是那些只导入到本地作用域中的模块。

这是有效的-使用importlib实际导入模块,并检查以获取成员:

#! /usr/bin/env python
#
# test.py  
#
# Find Modules
#
import inspect, importlib as implib

if __name__ == "__main__":
    mod = implib.import_module( "example" )
    for i in inspect.getmembers(mod, inspect.ismodule ):
        print i[0]

#! /usr/bin/env python
#
# example.py
#
import sys 
from os import path

if __name__ == "__main__":
    print "Hello World !!!!"
输出:

tony@laptop .../~:$ ./test.py
path
sys

我正在寻找类似的东西,我在一个名为。扫描仪使用导入挂钩(7行)执行您想要的操作。以下是一个简化示例:

import modulefinder, sys

class SingleFileModuleFinder(modulefinder.ModuleFinder):

    def import_hook(self, name, caller, *arg, **kwarg):
        if caller.__file__ == self.name:
            # Only call the parent at the top level.
            return modulefinder.ModuleFinder.import_hook(self, name, caller, *arg, **kwarg)

    def __call__(self, node):

        self.name = str(node)

        self.run_script(self.name)

if __name__ == '__main__':
    # Example entry, run with './script.py filename'
    print 'looking for includes in %s' % sys.argv[1]

    mf = SingleFileModuleFinder()
    mf(sys.argv[1])

    print '\n'.join(mf.modules.keys())
您可能想试试(双关语):

输出

defaultdict(<class 'list'>,
            {'IMPORT_FROM': ['absolute_import',
                             'division',
                             'open',
                             'check_output',
                             'Popen'],
             'IMPORT_NAME': ['__future__',
                             'os',
                             'collections',
                             'itertools',
                             'math',
                             'gzip',
                             'subprocess'],
             'IMPORT_STAR': [None]})
defaultdict(,
{'IMPORT_FROM':['absolute_IMPORT',
“分部”,
"开放",,
“检查输出”,
“Popen”],
“导入名称”:[“未来”,
“os”,
“收藏”,
“itertools”,
“数学”,
"gzip",,
“子流程”],
“导入星号”:[None]})

您导入的模块是
分组的['IMPORT\u NAME']

它实际上可以很好地使用

print [key for key in locals().keys()
   if isinstance(locals()[key], type(sys)) and not key.startswith('__')]

感谢Tony Suffolk检查、进口IB样品。。。我构建了这个wee模块,如果对您有帮助,欢迎大家使用。回馈,耶

import timeit
import os
import inspect, importlib as implib
import textwrap as twrap

def src_modules(filename):
    assert (len(filename)>1)

    mod = implib.import_module(filename.split(".")[0])
    ml_alias = []
    ml_actual = []
    ml_together = []
    ml_final = []
    for i in inspect.getmembers(mod, inspect.ismodule):
        ml_alias.append(i[0])
        ml_actual.append((str(i[1]).split(" ")[1]))
        ml_together = zip(ml_actual, ml_alias)
    for t in ml_together:
        (a,b) = t
        ml_final.append(a+":="+b)

    return ml_final

def l_to_str(itr):
    assert(len(itr)>0)

    itr.sort()
    r_str = ""
    for i in itr:
        r_str += i+"  "
    return r_str

def src_info(filename, start_time=timeit.default_timer()):
    assert (len(filename)>1)

    filename_in = filename
    filename = filename_in.split(".")[0]

    if __name__ == filename:
        output_module = filename
    else:
        output_module = __name__

    print ("\n" + (80 * "#"))
    print (" runtime ~= {0} ms".format(round(((timeit.default_timer() - start_time)*1000),3)))
    print (" source file --> '{0}'".format(filename_in))
    print (" output via --> '{0}'".format(output_module))
    print (" modules used in '{0}':".format(filename))
    print ("  "+"\n  ".join(twrap.wrap(l_to_str(src_modules(filename)), 75)))
    print (80 * "#")

    return ""


if __name__ == "__main__":
    src_info(os.path.basename(__file__))


## how to use in X file:
#
# import print_src_info
# import os
#
# < ... your code ... >
#
# if __name__ == "__main__":
#     print_src_info.src_info(os.path.basename(__file__))


## example output:
#
# ################################################################################
#  runtime ~= 0.049 ms
#  source file --> 'print_src_info.py'
#  output via --> '__main__'
#  modules used in 'print_src_info':
#   'importlib':=implib  'inspect':=inspect  'os':=os  'textwrap':=twrap
#   'timeit':=timeit
# ################################################################################
import timeit
导入操作系统
导入检查,作为implib导入lib
将textwrap作为twrap导入
def src_模块(文件名):
断言(len(文件名)>1)
mod=implib.import\u模块(filename.split(“.”[0]))
ml_别名=[]
ml_实际值=[]
ml_在一起=[]
ml_final=[]
对于inspect.getmembers中的i(mod,inspect.ismodule):
ml_alias.append(i[0])
ml_实际.append((str(i[1]).split(“”[1]))
ml\u together=zip(ml\u实际值,ml\u别名)
对于以ml_表示的t:
(a,b)=t
ml_final.append(a+“:=”+b)
返回ml_final
def l_至_str(itr):
断言(长度(itr)>0)
itr.sort()
r_str=“”
对于itr中的i:
r_str+=i+“”
返回r_街
def src_info(文件名,start_time=timeit.default_timer()):
断言(len(文件名)>1)
filename\u in=filename
filename=filename\u in.split(“.”[0]
如果uuu name uuuu==文件名:
输出模块=文件名
import timeit
import os
import inspect, importlib as implib
import textwrap as twrap

def src_modules(filename):
    assert (len(filename)>1)

    mod = implib.import_module(filename.split(".")[0])
    ml_alias = []
    ml_actual = []
    ml_together = []
    ml_final = []
    for i in inspect.getmembers(mod, inspect.ismodule):
        ml_alias.append(i[0])
        ml_actual.append((str(i[1]).split(" ")[1]))
        ml_together = zip(ml_actual, ml_alias)
    for t in ml_together:
        (a,b) = t
        ml_final.append(a+":="+b)

    return ml_final

def l_to_str(itr):
    assert(len(itr)>0)

    itr.sort()
    r_str = ""
    for i in itr:
        r_str += i+"  "
    return r_str

def src_info(filename, start_time=timeit.default_timer()):
    assert (len(filename)>1)

    filename_in = filename
    filename = filename_in.split(".")[0]

    if __name__ == filename:
        output_module = filename
    else:
        output_module = __name__

    print ("\n" + (80 * "#"))
    print (" runtime ~= {0} ms".format(round(((timeit.default_timer() - start_time)*1000),3)))
    print (" source file --> '{0}'".format(filename_in))
    print (" output via --> '{0}'".format(output_module))
    print (" modules used in '{0}':".format(filename))
    print ("  "+"\n  ".join(twrap.wrap(l_to_str(src_modules(filename)), 75)))
    print (80 * "#")

    return ""


if __name__ == "__main__":
    src_info(os.path.basename(__file__))


## how to use in X file:
#
# import print_src_info
# import os
#
# < ... your code ... >
#
# if __name__ == "__main__":
#     print_src_info.src_info(os.path.basename(__file__))


## example output:
#
# ################################################################################
#  runtime ~= 0.049 ms
#  source file --> 'print_src_info.py'
#  output via --> '__main__'
#  modules used in 'print_src_info':
#   'importlib':=implib  'inspect':=inspect  'os':=os  'textwrap':=twrap
#   'timeit':=timeit
# ################################################################################
def find_modules(code):
    modules = []
    code = code.splitlines()
    for item in code:
        if item[:7] == "import " and ", " not in item:
            if " as " in item:
                modules.append(item[7:item.find(" as ")])
            else:
                modules.append(item[7:])
        elif item[:5] == "from ":
            modules.append(item[5:item.find(" import ")])

        elif ", " in item:
            item = item[7:].split(", ")
            modules = modules+item

        else:
            print(item)
    return modules

code = """
import foo
import bar
from baz import eggs
import mymodule as test
import hello, there, stack
"""
print(find_modules(code))
['foo', 'bar', 'baz', 'mymodule', 'hello', 'there', 'stack']
import re
import os


def get_imported_modules(folder):
    files = [f for f in os.listdir(folder) if f.endswith(".py")]

    imports = []
    for file in files:
        with open(os.path.join(folder, file), mode="r") as f:
            lines = f.read()
            result = re.findall(r"(?<!from)import (\w+)[\n.]|from\s+(\w+)\s+import", lines)
            for imp in result:
                for i in imp:
                    if len(i):
                        if i not in imports:
                            imports.append(i)

    return imports
import ast


modules = set()

def visit_Import(node):
    for name in node.names:
        modules.add(name.name.split(".")[0])

def visit_ImportFrom(node):
    # if node.module is missing it's a "from . import ..." statement
    # if level > 0 it's a "from .submodule import ..." statement
    if node.module is not None and node.level == 0:
        modules.add(node.module.split(".")[0])

node_iter = ast.NodeVisitor()
node_iter.visit_Import = visit_Import
node_iter.visit_ImportFrom = visit_ImportFrom
# foo.py
import sys, os
import foo1
from foo2 import bar
from foo3 import bar as che
import foo4 as boo
import foo5.zoo
from foo6 import *
from . import foo7, foo8
from .foo12 import foo13
from foo9 import foo10, foo11

def do():
    import bar1
    from bar2 import foo
    from bar3 import che as baz
with open("foo.py") as f:
    node_iter.visit(ast.parse(f.read()))
print(modules)
set(['bar1', 'bar3', 'bar2', 'sys', 'foo9', 'foo4', 'foo5', 'foo6', 'os', 'foo1', 'foo2', 'foo3'])