Python Apache Spark-ImportError:没有名为_winreg的模块
一个非常适合我的剧本大约在一周前就停止了。 当我编译一个lambda函数,然后用它创建我的RDD时,问题就出现了 考虑以下代码:Python Apache Spark-ImportError:没有名为_winreg的模块,python,apache-spark,pyspark,Python,Apache Spark,Pyspark,一个非常适合我的剧本大约在一周前就停止了。 当我编译一个lambda函数,然后用它创建我的RDD时,问题就出现了 考虑以下代码: class RDDUtils(object): @staticmethod def map_builder(*fields): """ Creates a compiled lambda function for use in spark keyBy using the specified field names :param fields:
class RDDUtils(object):
@staticmethod
def map_builder(*fields):
"""
Creates a compiled lambda function for use in spark keyBy using the specified field names
:param fields: The name of the fields to create the function with
:return: A compiled python function
"""
func = FunctionType(
compile("lambda x: {" + ',\n'.join('"'"{}"'" : x[{}]'.format(c, i) for i, c in enumerate(fields, 0)) + "}",
"<string>",
"eval"), {})
return func()
@staticmethod
def rdd_creator(context, fields, source_file, delim='\t'):
"""
Method which creates an RDD
:param context: spark context
:param fields: fields / columns in our csv file
:param source_file: location of csv file
:return: RDD
"""
build = RDDUtils.map_builder(*fields)
rdd = context.textFile(source_file).map(lambda x: x.split(delim)).map(build)
return rdd
rdd = RDDUtils()
sc = context('demo1', 'local')
fields = ['username', 'full_name', 'src_id']
source_file = '/home/aaron/dim_operator.csv'
create_rdd = rdd.rdd_creator(sc, fields,source_file)
print create_rdd.first()
class RDDUtils(object):
def map_builder(*fields):
lambda_dict = "{" + ','.join('"'"{}"'" : x[{}]'.format(c, i) for i, c in enumerate(fields, 0)) + "}"
return lambda_dict
def rdd_creator(context, fields, source_file, delim='\t'):
"""
Creates an RDD
"""
build = map_builder(*fields)
rdd = context.textFile(source_file).map(lambda x: x.split(delim)).map(lambda x: eval(build))
return rdd
if __name__ == "__main__":
rdd = RDDUtils()
sc = context('demo1', 'local')
fields = ['username', 'full_name', 'src_id']
source_file = '/home/aaron/dim_operator.csv'
create_rdd = rdd.rdd_creator(sc, fields,source_file)
print create_rdd.first()
是什么原因导致这个突然停止工作
在Ubuntu 14.04.3上运行这个
我通过显式调用lambda,然后在没有lambda的情况下动态创建的字符串周围包装eval(),解决了这个问题
更新代码如下:
class RDDUtils(object):
@staticmethod
def map_builder(*fields):
"""
Creates a compiled lambda function for use in spark keyBy using the specified field names
:param fields: The name of the fields to create the function with
:return: A compiled python function
"""
func = FunctionType(
compile("lambda x: {" + ',\n'.join('"'"{}"'" : x[{}]'.format(c, i) for i, c in enumerate(fields, 0)) + "}",
"<string>",
"eval"), {})
return func()
@staticmethod
def rdd_creator(context, fields, source_file, delim='\t'):
"""
Method which creates an RDD
:param context: spark context
:param fields: fields / columns in our csv file
:param source_file: location of csv file
:return: RDD
"""
build = RDDUtils.map_builder(*fields)
rdd = context.textFile(source_file).map(lambda x: x.split(delim)).map(build)
return rdd
rdd = RDDUtils()
sc = context('demo1', 'local')
fields = ['username', 'full_name', 'src_id']
source_file = '/home/aaron/dim_operator.csv'
create_rdd = rdd.rdd_creator(sc, fields,source_file)
print create_rdd.first()
class RDDUtils(object):
def map_builder(*fields):
lambda_dict = "{" + ','.join('"'"{}"'" : x[{}]'.format(c, i) for i, c in enumerate(fields, 0)) + "}"
return lambda_dict
def rdd_creator(context, fields, source_file, delim='\t'):
"""
Creates an RDD
"""
build = map_builder(*fields)
rdd = context.textFile(source_file).map(lambda x: x.split(delim)).map(lambda x: eval(build))
return rdd
if __name__ == "__main__":
rdd = RDDUtils()
sc = context('demo1', 'local')
fields = ['username', 'full_name', 'src_id']
source_file = '/home/aaron/dim_operator.csv'
create_rdd = rdd.rdd_creator(sc, fields,source_file)
print create_rdd.first()
预期结果如下:
{'username':u'dev','src_id':u'1','full_name':u'Main dev user'}
编辑:下面是完整的回溯
Traceback (most recent call last):
File "/home/aaron/apps/pycharm-3.0.2/helpers/pydev/pydevd.py", line 1532, in <module>
debugger.run(setup['file'], None, None)
File "/home/aaron/apps/pycharm-3.0.2/helpers/pydev/pydevd.py", line 1143, in run
pydev_imports.execfile(file, globals, locals) #execute the script
File "/home/aaron/PycharmProjects/fetl/dim_operator.py", line 127, in <module>
print create_rdd.first()
File "/home/aaron/apps/spark/python/pyspark/rdd.py", line 1242, in first
rs = self.take(1)
File "/home/aaron/apps/spark/python/pyspark/rdd.py", line 1194, in take
totalParts = self._jrdd.partitions().size()
File "/home/aaron/apps/spark/python/pyspark/rdd.py", line 2288, in _jrdd
pickled_cmd, bvars, env, includes = _prepare_for_python_RDD(self.ctx, command, self)
File "/home/aaron/apps/spark/python/pyspark/rdd.py", line 2206, in _prepare_for_python_RDD
pickled_command = ser.dumps(command)
File "/home/aaron/apps/spark/python/pyspark/serializers.py", line 411, in dumps
return cloudpickle.dumps(obj, 2)
File "/home/aaron/apps/spark/python/pyspark/cloudpickle.py", line 816, in dumps
cp.dump(obj)
File "/home/aaron/apps/spark/python/pyspark/cloudpickle.py", line 133, in dump
return pickle.Pickler.dump(self, obj)
File "/usr/lib/python2.7/pickle.py", line 224, in dump
self.save(obj)
File "/usr/lib/python2.7/pickle.py", line 286, in save
f(self, obj) # Call unbound method with explicit self
File "/usr/lib/python2.7/pickle.py", line 562, in save_tuple
save(element)
File "/usr/lib/python2.7/pickle.py", line 286, in save
f(self, obj) # Call unbound method with explicit self
File "/home/aaron/apps/spark/python/pyspark/cloudpickle.py", line 254, in save_function
self.save_function_tuple(obj, [themodule])
File "/home/aaron/apps/spark/python/pyspark/cloudpickle.py", line 304, in save_function_tuple
save((code, closure, base_globals))
File "/usr/lib/python2.7/pickle.py", line 286, in save
f(self, obj) # Call unbound method with explicit self
File "/usr/lib/python2.7/pickle.py", line 548, in save_tuple
save(element)
File "/usr/lib/python2.7/pickle.py", line 286, in save
f(self, obj) # Call unbound method with explicit self
File "/usr/lib/python2.7/pickle.py", line 600, in save_list
self._batch_appends(iter(obj))
File "/usr/lib/python2.7/pickle.py", line 633, in _batch_appends
save(x)
File "/usr/lib/python2.7/pickle.py", line 286, in save
f(self, obj) # Call unbound method with explicit self
File "/home/aaron/apps/spark/python/pyspark/cloudpickle.py", line 254, in save_function
self.save_function_tuple(obj, [themodule])
File "/home/aaron/apps/spark/python/pyspark/cloudpickle.py", line 304, in save_function_tuple
save((code, closure, base_globals))
File "/usr/lib/python2.7/pickle.py", line 286, in save
f(self, obj) # Call unbound method with explicit self
File "/usr/lib/python2.7/pickle.py", line 548, in save_tuple
save(element)
File "/usr/lib/python2.7/pickle.py", line 286, in save
f(self, obj) # Call unbound method with explicit self
File "/usr/lib/python2.7/pickle.py", line 600, in save_list
self._batch_appends(iter(obj))
File "/usr/lib/python2.7/pickle.py", line 636, in _batch_appends
save(tmp[0])
File "/usr/lib/python2.7/pickle.py", line 286, in save
f(self, obj) # Call unbound method with explicit self
File "/home/aaron/apps/spark/python/pyspark/cloudpickle.py", line 209, in save_function
modname = pickle.whichmodule(obj, name)
File "/usr/lib/python2.7/pickle.py", line 817, in whichmodule
if name != '__main__' and getattr(module, funcname, None) is func:
File "/usr/lib/python2.7/dist-packages/six.py", line 116, in __getattr__
_module = self._resolve()
File "/usr/lib/python2.7/dist-packages/six.py", line 105, in _resolve
return _import_module(self.mod)
File "/usr/lib/python2.7/dist-packages/six.py", line 76, in _import_module
__import__(name)
ImportError: No module named _winreg
回溯(最近一次呼叫最后一次):
文件“/home/aron/apps/pycharm-3.0.2/helpers/pydev/pydevd.py”,第1532行,在
运行(安装程序['file'],无,无)
文件“/home/aron/apps/pycharm-3.0.2/helpers/pydev/pydevd.py”,第1143行,运行中
pydev_imports.execfile(文件、全局、局部)#执行脚本
文件“/home/aron/PycharmProjects/fetl/dim_operator.py”,第127行,在
打印创建rdd.first()
文件“/home/aron/apps/spark/python/pyspark/rdd.py”,第1242行,第一行
rs=自取(1)
文件“/home/aron/apps/spark/python/pyspark/rdd.py”,第1194行,在take中
totalParts=self.\u jrdd.partitions().size()
文件“/home/aron/apps/spark/python/pyspark/rdd.py”,第2288行,在jrdd中
pickled_cmd,bvars,env,includes=_prepare_for_python_RDD(self.ctx,command,self)
文件“/home/aron/apps/spark/python/pyspark/rdd.py”,第2206行,在“为python\rdd做准备”中
pickled_command=ser.dumps(命令)
文件“/home/aron/apps/spark/python/pyspark/serializers.py”,第411行,转储
返回cloudpickle.dumps(obj,2)
文件“/home/aron/apps/spark/python/pyspark/cloudpickle.py”,第816行,转储
cp.dump(obj)
文件“/home/aron/apps/spark/python/pyspark/cloudpickle.py”,第133行,在转储文件中
返回pickle.Pickler.dump(self,obj)
文件“/usr/lib/python2.7/pickle.py”,第224行,在转储中
自我保存(obj)
文件“/usr/lib/python2.7/pickle.py”,第286行,保存
f(self,obj)#用显式self调用未绑定方法
文件“/usr/lib/python2.7/pickle.py”,第562行,在save_tuple中
保存(元素)
文件“/usr/lib/python2.7/pickle.py”,第286行,保存
f(self,obj)#用显式self调用未绑定方法
文件“/home/aron/apps/spark/python/pyspark/cloudpickle.py”,第254行,在save_函数中
self.save\u函数\u元组(obj,[themodule])
文件“/home/aaron/apps/spark/python/pyspark/cloudpickle.py”,第304行,在save_function_元组中
保存((代码、闭包、基本\全局))
文件“/usr/lib/python2.7/pickle.py”,第286行,保存
f(self,obj)#用显式self调用未绑定方法
文件“/usr/lib/python2.7/pickle.py”,第548行,在save_tuple中
保存(元素)
文件“/usr/lib/python2.7/pickle.py”,第286行,保存
f(self,obj)#用显式self调用未绑定方法
保存列表中第600行的文件“/usr/lib/python2.7/pickle.py”
自批附录(iter(obj))
文件“/usr/lib/python2.7/pickle.py”,第633行,在批处理附录中
保存(x)
文件“/usr/lib/python2.7/pickle.py”,第286行,保存
f(self,obj)#用显式self调用未绑定方法
文件“/home/aron/apps/spark/python/pyspark/cloudpickle.py”,第254行,在save_函数中
self.save\u函数\u元组(obj,[themodule])
文件“/home/aaron/apps/spark/python/pyspark/cloudpickle.py”,第304行,在save_function_元组中
保存((代码、闭包、基本\全局))
文件“/usr/lib/python2.7/pickle.py”,第286行,保存
f(self,obj)#用显式self调用未绑定方法
文件“/usr/lib/python2.7/pickle.py”,第548行,在save_tuple中
保存(元素)
文件“/usr/lib/python2.7/pickle.py”,第286行,保存
f(self,obj)#用显式self调用未绑定方法
保存列表中第600行的文件“/usr/lib/python2.7/pickle.py”
自批附录(iter(obj))
文件“/usr/lib/python2.7/pickle.py”,第636行,在批处理附录中
保存(tmp[0])
文件“/usr/lib/python2.7/pickle.py”,第286行,保存
f(self,obj)#用显式self调用未绑定方法
文件“/home/aron/apps/spark/python/pyspark/cloudpickle.py”,第209行,在save_函数中
modname=pickle.whichmodule(对象,名称)
文件“/usr/lib/python2.7/pickle.py”,第817行,其中包含模块
如果名称!='__main_uu'和getattr(模块,funcname,无)是func:
文件“/usr/lib/python2.7/dist packages/six.py”,第116行,在__
_模块=自我解析()
文件“/usr/lib/python2.7/dist-packages/six.py”,第105行,在
返回导入模块(self.mod)
文件“/usr/lib/python2.7/dist packages/six.py”,第76行,在导入模块中
__导入(名称)
ImportError:没有名为_winreg的模块
此错误是由您的操作系统引起的。winreg无法在Linux(Ubuntu)上运行。这是一个仅限Windows的模块
可用性:Windows
版本2.0中的新功能
这些函数向Python公开Windows注册表API。不是使用整数作为注册表句柄,而是使用句柄对象来确保句柄正确关闭,即使程序员忽略显式关闭它们
此模块向Windows注册表公开一个非常低级的接口;预计未来将创建一个新的winreg模块,为注册表API提供更高级别的接口
此错误是由于您的操作系统造成的。winreg无法在Linux(Ubuntu)上运行。这是一个仅限Windows的模块 可用性:Windows 版本2.0中的新功能 这些函数向Python公开Windows注册表API。不是使用整数作为注册表句柄,而是使用句柄对象来确保句柄正确关闭,即使程序员忽略显式关闭它们 此模块向Windows注册表公开一个非常低级的接口;预计未来将创建一个新的winreg模块,为注册表API提供更高级别的接口
但是没有直接提到o