如何解决类型错误:不可损坏类型:';numpy.ndarray';?
如何解决类型错误:不可损坏类型:';numpy.ndarray';?,numpy,apache-spark,scipy,pyspark,numpy-ndarray,Numpy,Apache Spark,Scipy,Pyspark,Numpy Ndarray,打印(嵌入.take(1))返回以下内容: [(u'text', array([-0.31921682, -0.20686883, 0.01824462, 0.13350081, 0.01187224, 0.22116834, -0.287487 , -0.11509234, 0.01763669, 0.06377559, 0.2989474 , 0.41020724, -0.22037283, 0.45994595, -0.12425458,
打印(嵌入.take(1))
返回以下内容:
[(u'text', array([-0.31921682, -0.20686883, 0.01824462, 0.13350081, 0.01187224,
0.22116834, -0.287487 , -0.11509234, 0.01763669, 0.06377559,
0.2989474 , 0.41020724, -0.22037283, 0.45994595, -0.12425458,
-0.20881261, 0.06872807, 0.53261876, 0.45528108, 0.3245842 ,
0.09092806, 0.17639753, -0.47674257, -0.00098801, -0.12842408,
-0.2413709 , 0.38194713, -0.11250313, -0.15904745, 0.16058864,
-0.33080024, 0.37156585, 0.01329294, 0.36711624, 0.1973844 ,
-0.18771271, 0.08853641, 0.23573542, 0.09280699, -0.07244137,
0.09726012, -0.28807876, 0.01709639, 0.375758 , 0.4611828 ,
0.02307661, 0.3119973 , -0.5212216 , 0.230173 , -0.09128311,
0.2713826 , -0.2568359 , -0.11232495, 0.00200466, 0.09583885,
-0.25420523, -0.10021619, -0.19341935, -0.22922793, 0.0212901 ,
0.20808727, 0.32417038, 0.03864996, 0.2969149 , -0.03171158,
0.45413095, 0.6309765 , 0.28096622, -0.45515797, 0.28787974,
-0.5809179 , -0.09877653, 0.24814974, -0.35588014, 0.42792156,
0.21451631, -0.1350529 , 0.37952444, -0.00165558, 0.384076 ,
0.1413526 , 0.08866125, -0.10355992, 0.25019792, -0.00393839,
0.5695221 , -0.12449711, 0.00210058, 0.07765691, 0.3281926 ,
-0.08014766, 0.20689923, -0.22970992, 0.07591247, -0.15799475,
-0.22765721, -0.1927638 , -0.15355097, 0.09073654, -0.04624737,
-0.01643844, -0.8464762 , 0.08931787, 0.5332598 , 0.2911471 ,
0.3791839 , -0.303577 , -0.22905344, -0.21888404, -0.1810556 ,
-0.019493 , 0.1367392 , 0.2187451 , 0.04935849, 0.14806354,
-0.00551599, -0.05861316, 0.26915333, -0.3377117 , 0.00114926,
-0.08245742, 0.4929164 , 0.06329145, 0.88905925, 0.25238925,
-0.33230686, 0.19560733, 0.4172665 , 0.02552557, 0.26976195,
0.26783204, -0.25621846, 0.1972084 , -0.3187281 , 0.04309576,
0.5668932 , -0.20068711, -0.55052537, 0.38765076, 0.4864744 ,
0.1130944 , -0.01685749, 0.2522309 , 0.35446006, -0.09084648,
0.24245648, 0.06625048, 0.32369784, -0.06834482, -0.43762162,
0.5748935 , -0.3647702 , 0.35806394, -0.1582715 , 0.0772159 ,
-0.16100545, 0.4267 , -0.1307025 , -0.03227446, 0.10494301,
-0.05289922, 0.7097728 , -0.17166416, -0.054304 , 0.12740278,
-0.14317441, -0.26385677, -0.22849232, 0.10305541, -0.04086439,
-0.46178675, -0.09216189, -0.76668286, -0.09225449, -0.2168125 ,
0.12703866, 0.05073327, 0.04017496, -0.24126993, 0.06369572,
-0.09168304, -0.40669888, 0.28603286, -0.04988515, 0.05513516,
0.7806739 , -0.02534869, 0.00973589, 0.33947662, 0.24699458,
0.42974108, -0.19158548, 0.07973159, 0.30443648, 0.01040802,
0.1698588 , 0.06705329, 0.03138978, -0.5304623 , 0.1360791 ,
0.24522378, 0.12584817, -0.14219321, 0.19166584, 0.2226152 ,
0.07703363, 0.23977087, 0.11122001, 0.1861035 , 0.37293455,
0.33047304, -0.00121733, 0.5897423 , -0.06080131, -0.05202375,
0.10317306, -0.04354465, -0.31621787, 0.35597408, 0.3476911 ,
0.19293919, -0.02774811, -0.2739977 , -0.08927495, 0.04805851,
-0.17200205, -0.7588404 , -0.0615377 , 0.22575249, -0.09135661,
0.05711236, 0.04755763, -0.22369057, -0.17867683, -0.5001432 ,
-0.50440204, 0.09518195, 0.21083611, 0.1761034 , -0.12429572,
-0.39988747, 0.61002195, 0.4314368 , 0.02579845, -0.28676844,
-0.03552085, 0.21395397, 0.075849 , -0.21982886, -0.39733076,
0.17018917, 0.0251913 , 0.14543247, 0.2262631 , -0.05759874,
-0.18585657, 0.3211592 , -0.272339 , -0.22289205, -0.31557533,
0.27803165, 0.22905917, -0.15953094, -0.10018265, -0.32501385,
0.3319722 , -0.5288052 , -0.18201298, 0.08485821, -0.3703766 ,
0.21344219, -0.13149662, 0.21560058, 0.08420809, 0.15680231,
0.22140822, 0.24218608, 0.40488818, -0.00483301, -0.34549713,
0.4376315 , 0.53999686, 0.5157788 , -0.14618067, 0.15518756,
-0.29715803, 0.3193897 , 0.08780982, -0.08137056, -0.01085411,
0.17661236, 0.03506121, -0.030442 , -0.04898388, 0.36918342,
-0.07420906, 0.24415983, -0.02784878, 0.11730439, 0.24948044],
dtype=float32))]
inputInvertedIndexEmbedding = inputRawEmbed.flatMap(lambda (url, document): [(url, word) for word in document.lower().split()]).map(lambda (url, word): (word, Loader(modelpath).map(word)))
和打印(键入(嵌入))
打印:
当我执行以下操作时,将抛出该exoption:
[(u'text', array([-0.31921682, -0.20686883, 0.01824462, 0.13350081, 0.01187224,
0.22116834, -0.287487 , -0.11509234, 0.01763669, 0.06377559,
0.2989474 , 0.41020724, -0.22037283, 0.45994595, -0.12425458,
-0.20881261, 0.06872807, 0.53261876, 0.45528108, 0.3245842 ,
0.09092806, 0.17639753, -0.47674257, -0.00098801, -0.12842408,
-0.2413709 , 0.38194713, -0.11250313, -0.15904745, 0.16058864,
-0.33080024, 0.37156585, 0.01329294, 0.36711624, 0.1973844 ,
-0.18771271, 0.08853641, 0.23573542, 0.09280699, -0.07244137,
0.09726012, -0.28807876, 0.01709639, 0.375758 , 0.4611828 ,
0.02307661, 0.3119973 , -0.5212216 , 0.230173 , -0.09128311,
0.2713826 , -0.2568359 , -0.11232495, 0.00200466, 0.09583885,
-0.25420523, -0.10021619, -0.19341935, -0.22922793, 0.0212901 ,
0.20808727, 0.32417038, 0.03864996, 0.2969149 , -0.03171158,
0.45413095, 0.6309765 , 0.28096622, -0.45515797, 0.28787974,
-0.5809179 , -0.09877653, 0.24814974, -0.35588014, 0.42792156,
0.21451631, -0.1350529 , 0.37952444, -0.00165558, 0.384076 ,
0.1413526 , 0.08866125, -0.10355992, 0.25019792, -0.00393839,
0.5695221 , -0.12449711, 0.00210058, 0.07765691, 0.3281926 ,
-0.08014766, 0.20689923, -0.22970992, 0.07591247, -0.15799475,
-0.22765721, -0.1927638 , -0.15355097, 0.09073654, -0.04624737,
-0.01643844, -0.8464762 , 0.08931787, 0.5332598 , 0.2911471 ,
0.3791839 , -0.303577 , -0.22905344, -0.21888404, -0.1810556 ,
-0.019493 , 0.1367392 , 0.2187451 , 0.04935849, 0.14806354,
-0.00551599, -0.05861316, 0.26915333, -0.3377117 , 0.00114926,
-0.08245742, 0.4929164 , 0.06329145, 0.88905925, 0.25238925,
-0.33230686, 0.19560733, 0.4172665 , 0.02552557, 0.26976195,
0.26783204, -0.25621846, 0.1972084 , -0.3187281 , 0.04309576,
0.5668932 , -0.20068711, -0.55052537, 0.38765076, 0.4864744 ,
0.1130944 , -0.01685749, 0.2522309 , 0.35446006, -0.09084648,
0.24245648, 0.06625048, 0.32369784, -0.06834482, -0.43762162,
0.5748935 , -0.3647702 , 0.35806394, -0.1582715 , 0.0772159 ,
-0.16100545, 0.4267 , -0.1307025 , -0.03227446, 0.10494301,
-0.05289922, 0.7097728 , -0.17166416, -0.054304 , 0.12740278,
-0.14317441, -0.26385677, -0.22849232, 0.10305541, -0.04086439,
-0.46178675, -0.09216189, -0.76668286, -0.09225449, -0.2168125 ,
0.12703866, 0.05073327, 0.04017496, -0.24126993, 0.06369572,
-0.09168304, -0.40669888, 0.28603286, -0.04988515, 0.05513516,
0.7806739 , -0.02534869, 0.00973589, 0.33947662, 0.24699458,
0.42974108, -0.19158548, 0.07973159, 0.30443648, 0.01040802,
0.1698588 , 0.06705329, 0.03138978, -0.5304623 , 0.1360791 ,
0.24522378, 0.12584817, -0.14219321, 0.19166584, 0.2226152 ,
0.07703363, 0.23977087, 0.11122001, 0.1861035 , 0.37293455,
0.33047304, -0.00121733, 0.5897423 , -0.06080131, -0.05202375,
0.10317306, -0.04354465, -0.31621787, 0.35597408, 0.3476911 ,
0.19293919, -0.02774811, -0.2739977 , -0.08927495, 0.04805851,
-0.17200205, -0.7588404 , -0.0615377 , 0.22575249, -0.09135661,
0.05711236, 0.04755763, -0.22369057, -0.17867683, -0.5001432 ,
-0.50440204, 0.09518195, 0.21083611, 0.1761034 , -0.12429572,
-0.39988747, 0.61002195, 0.4314368 , 0.02579845, -0.28676844,
-0.03552085, 0.21395397, 0.075849 , -0.21982886, -0.39733076,
0.17018917, 0.0251913 , 0.14543247, 0.2262631 , -0.05759874,
-0.18585657, 0.3211592 , -0.272339 , -0.22289205, -0.31557533,
0.27803165, 0.22905917, -0.15953094, -0.10018265, -0.32501385,
0.3319722 , -0.5288052 , -0.18201298, 0.08485821, -0.3703766 ,
0.21344219, -0.13149662, 0.21560058, 0.08420809, 0.15680231,
0.22140822, 0.24218608, 0.40488818, -0.00483301, -0.34549713,
0.4376315 , 0.53999686, 0.5157788 , -0.14618067, 0.15518756,
-0.29715803, 0.3193897 , 0.08780982, -0.08137056, -0.01085411,
0.17661236, 0.03506121, -0.030442 , -0.04898388, 0.36918342,
-0.07420906, 0.24415983, -0.02784878, 0.11730439, 0.24948044],
dtype=float32))]
inputInvertedIndexEmbedding = inputRawEmbed.flatMap(lambda (url, document): [(url, word) for word in document.lower().split()]).map(lambda (url, word): (word, Loader(modelpath).map(word)))
回溯(最近一次调用上次):文件
“/project/6008168/tamouze/testSparkCedar.py”,第390行,在
打印(embed1.distinct().count())文件“/cvmfs/soft.computecanada.ca/easybuild/software/2017/Core/spark/2.3.0/python/lib/pyspark.zip/pyspark/rdd.py”,
第1056行,在计数文件中
“/cvmfs/soft.computecanada.ca/easybuild/software/2017/Core/spark/2.3.0/python/lib/pyspark.zip/pyspark/rdd.py”,
第1047行,在sum文件中
“/cvmfs/soft.computecanada.ca/easybuild/software/2017/Core/spark/2.3.0/python/lib/pyspark.zip/pyspark/rdd.py”,
第921行,在折叠文件中
“/cvmfs/soft.computecanada.ca/easybuild/software/2017/Core/spark/2.3.0/python/lib/pyspark.zip/pyspark/rdd.py”,
第824行,在collect文件中
“/cvmfs/soft.computecanada.ca/easybuild/software/2017/Core/spark/2.3.0/python/lib/py4j-0.10.6-src.zip/py4j/java_gateway.py”,
第1160行,在调用文件中
“/cvmfs/soft.computecanada.ca/easybuild/software/2017/Core/spark/2.3.0/python/lib/py4j-0.10.6-src.zip/py4j/protocol.py”,
第320行,在get_return_值py4j.protocol.Py4JJavaError中:一个错误
在调用时发生
z:org.apache.spark.api.python.PythonRDD.collectAndServe.:
org.apache.spark.sparkeexception:由于阶段失败,作业中止:
阶段1.0中的任务3失败1次,最近一次失败:丢失任务3.0
在第1.0阶段(TID 4、本地主机、执行器驱动程序):
org.apache.spark.api.python.PythonException:回溯(最新版本)
调用最后一个文件
“/cvmfs/soft.computecanada.ca/easybuild/software/2017/Core/spark/2.3.0/python/lib/pyspark.zip/pyspark/worker.py”,
第229行,主
process()文件“/cvmfs/soft.computecanada.ca/easybuild/software/2017/Core/spark/2.3.0/python/lib/pyspark.zip/pyspark/worker.py”,
第224行,正在处理中
serializer.dump_流(func(split_索引,迭代器),outfile)文件
“/cvmfs/soft.computecanada.ca/easybuild/software/2017/Core/spark/2.3.0/python/lib/pyspark.zip/pyspark/rdd.py”,
第2438行,管道_func文件中
“/cvmfs/soft.computecanada.ca/easybuild/software/2017/Core/spark/2.3.0/python/lib/pyspark.zip/pyspark/rdd.py”,
第2438行,管道_func文件中
“/cvmfs/soft.computecanada.ca/easybuild/software/2017/Core/spark/2.3.0/python/lib/pyspark.zip/pyspark/rdd.py”,
第362行,在func文件中
“/cvmfs/soft.computecanada.ca/easybuild/software/2017/Core/spark/2.3.0/python/lib/pyspark.zip/pyspark/rdd.py”,
第1857行,合并文件中
“/cvmfs/soft.computecanada.ca/easybuild/software/2017/Core/spark/2.3.0/python/lib/pyspark.zip/pyspark/shuffle.py”,
第238行,合并值
d[k]=comb(d[k],v)if k in d else creator(v)TypeError:不可损坏的类型:“numpy.ndarray”
我得到的结果如下:
[(u'text', array([-0.31921682, -0.20686883, 0.01824462, 0.13350081, 0.01187224,
0.22116834, -0.287487 , -0.11509234, 0.01763669, 0.06377559,
0.2989474 , 0.41020724, -0.22037283, 0.45994595, -0.12425458,
-0.20881261, 0.06872807, 0.53261876, 0.45528108, 0.3245842 ,
0.09092806, 0.17639753, -0.47674257, -0.00098801, -0.12842408,
-0.2413709 , 0.38194713, -0.11250313, -0.15904745, 0.16058864,
-0.33080024, 0.37156585, 0.01329294, 0.36711624, 0.1973844 ,
-0.18771271, 0.08853641, 0.23573542, 0.09280699, -0.07244137,
0.09726012, -0.28807876, 0.01709639, 0.375758 , 0.4611828 ,
0.02307661, 0.3119973 , -0.5212216 , 0.230173 , -0.09128311,
0.2713826 , -0.2568359 , -0.11232495, 0.00200466, 0.09583885,
-0.25420523, -0.10021619, -0.19341935, -0.22922793, 0.0212901 ,
0.20808727, 0.32417038, 0.03864996, 0.2969149 , -0.03171158,
0.45413095, 0.6309765 , 0.28096622, -0.45515797, 0.28787974,
-0.5809179 , -0.09877653, 0.24814974, -0.35588014, 0.42792156,
0.21451631, -0.1350529 , 0.37952444, -0.00165558, 0.384076 ,
0.1413526 , 0.08866125, -0.10355992, 0.25019792, -0.00393839,
0.5695221 , -0.12449711, 0.00210058, 0.07765691, 0.3281926 ,
-0.08014766, 0.20689923, -0.22970992, 0.07591247, -0.15799475,
-0.22765721, -0.1927638 , -0.15355097, 0.09073654, -0.04624737,
-0.01643844, -0.8464762 , 0.08931787, 0.5332598 , 0.2911471 ,
0.3791839 , -0.303577 , -0.22905344, -0.21888404, -0.1810556 ,
-0.019493 , 0.1367392 , 0.2187451 , 0.04935849, 0.14806354,
-0.00551599, -0.05861316, 0.26915333, -0.3377117 , 0.00114926,
-0.08245742, 0.4929164 , 0.06329145, 0.88905925, 0.25238925,
-0.33230686, 0.19560733, 0.4172665 , 0.02552557, 0.26976195,
0.26783204, -0.25621846, 0.1972084 , -0.3187281 , 0.04309576,
0.5668932 , -0.20068711, -0.55052537, 0.38765076, 0.4864744 ,
0.1130944 , -0.01685749, 0.2522309 , 0.35446006, -0.09084648,
0.24245648, 0.06625048, 0.32369784, -0.06834482, -0.43762162,
0.5748935 , -0.3647702 , 0.35806394, -0.1582715 , 0.0772159 ,
-0.16100545, 0.4267 , -0.1307025 , -0.03227446, 0.10494301,
-0.05289922, 0.7097728 , -0.17166416, -0.054304 , 0.12740278,
-0.14317441, -0.26385677, -0.22849232, 0.10305541, -0.04086439,
-0.46178675, -0.09216189, -0.76668286, -0.09225449, -0.2168125 ,
0.12703866, 0.05073327, 0.04017496, -0.24126993, 0.06369572,
-0.09168304, -0.40669888, 0.28603286, -0.04988515, 0.05513516,
0.7806739 , -0.02534869, 0.00973589, 0.33947662, 0.24699458,
0.42974108, -0.19158548, 0.07973159, 0.30443648, 0.01040802,
0.1698588 , 0.06705329, 0.03138978, -0.5304623 , 0.1360791 ,
0.24522378, 0.12584817, -0.14219321, 0.19166584, 0.2226152 ,
0.07703363, 0.23977087, 0.11122001, 0.1861035 , 0.37293455,
0.33047304, -0.00121733, 0.5897423 , -0.06080131, -0.05202375,
0.10317306, -0.04354465, -0.31621787, 0.35597408, 0.3476911 ,
0.19293919, -0.02774811, -0.2739977 , -0.08927495, 0.04805851,
-0.17200205, -0.7588404 , -0.0615377 , 0.22575249, -0.09135661,
0.05711236, 0.04755763, -0.22369057, -0.17867683, -0.5001432 ,
-0.50440204, 0.09518195, 0.21083611, 0.1761034 , -0.12429572,
-0.39988747, 0.61002195, 0.4314368 , 0.02579845, -0.28676844,
-0.03552085, 0.21395397, 0.075849 , -0.21982886, -0.39733076,
0.17018917, 0.0251913 , 0.14543247, 0.2262631 , -0.05759874,
-0.18585657, 0.3211592 , -0.272339 , -0.22289205, -0.31557533,
0.27803165, 0.22905917, -0.15953094, -0.10018265, -0.32501385,
0.3319722 , -0.5288052 , -0.18201298, 0.08485821, -0.3703766 ,
0.21344219, -0.13149662, 0.21560058, 0.08420809, 0.15680231,
0.22140822, 0.24218608, 0.40488818, -0.00483301, -0.34549713,
0.4376315 , 0.53999686, 0.5157788 , -0.14618067, 0.15518756,
-0.29715803, 0.3193897 , 0.08780982, -0.08137056, -0.01085411,
0.17661236, 0.03506121, -0.030442 , -0.04898388, 0.36918342,
-0.07420906, 0.24415983, -0.02784878, 0.11730439, 0.24948044],
dtype=float32))]
inputInvertedIndexEmbedding = inputRawEmbed.flatMap(lambda (url, document): [(url, word) for word in document.lower().split()]).map(lambda (url, word): (word, Loader(modelpath).map(word)))
其中loader是:
import gensim
from gensim.models.fasttext import FastText as FT_gensim
import numpy as np
class Loader(object):
cache = {}
emb_d
ic = {}
count = 0
def __init__(self, filename):
#print("\n|-------------------------------------|")
#print ("Welcome to Loader class in python")
#print("\n|-------------------------------------|")
self.fn = filename
@property
def fasttext(self):
#if Loader.count == 1:
#print("already loaded")
if self.fn not in Loader.cache:
Loader.cache[self.fn] = FT_gensim.load_fasttext_format(self.fn)
print ("==================================================================")
Loader.count = Loader.count + 1
print("**********************The class Loader is loded for the {} times ******************".format(Loader.count))
print ("==================================================================")
return Loader.cache[self.fn]
def map(self, word):
if word not in self.fasttext:
Loader.emb_dic[word] = np.random.uniform(low = 0.0, high = 1.0, size = 300)
return Loader.emb_dic[word]
return self.fasttext[word]
我最好的猜测是,Pyspark在输入numpy数组时不知道如何处理它们,因为numpy有自己的类型,而Python的float/double则相反——我通常在数据帧中执行类似操作时会看到这个错误。如果返回
self.fasttext[word].tolist()
,并在返回任何其他numpy类型之前将其转换为基本python类型,则应该可以解决此错误 我通过以下方法解决了这个问题:
inputInvertedIndexEmbedding= inputRawEmbed.flatMap(lambda (url, document): [(url, word) for word in document.lower().split()]).map(lambda (url, word): (word, tuple(Loader(modelpath).map(word))))
看起来您正在尝试将数组用作字典键。很难说数据转储和没有代码。@Mad物理学家我添加了代码。你能检查并找出我的错误在哪里吗?可能的副本请将回溯格式化为代码而不是报价。换行符区域对易读性很重要。@mad我可以这样做,因为帖子变成了全部代码。