Python 不能'；找不到'的外部结构转换器；开罗.上下文'；_Python_Pyspark_Pycairo

Python 不能'；找不到'的外部结构转换器；开罗.上下文'；

python pyspark

Python 不能'；找不到'的外部结构转换器；开罗.上下文'；,python,pyspark,pycairo,Python,Pyspark,Pycairo,又是我。这是一段代码，与我正在做的一个项目有关，名为“Twitter数据情绪分析”。下面的代码基本上用于显示正面和负面推文的数量，其中我得到了下面给出的错误 from pyspark import SparkConf, SparkContext from pyspark.streaming import StreamingContext from pyspark.streaming.kafka import KafkaUtils import operator import numpy as n

又是我。这是一段代码，与我正在做的一个项目有关，名为“Twitter数据情绪分析”。下面的代码基本上用于显示正面和负面推文的数量，其中我得到了下面给出的错误

from pyspark import SparkConf, SparkContext
from pyspark.streaming import StreamingContext
from pyspark.streaming.kafka import KafkaUtils
import operator
import numpy as np
import matplotlib.pyplot as plt


def main():
        conf = SparkConf().setMaster("local[2]").setAppName("Streamer")
        sc = SparkContext(conf=conf)

        # Creating a streaming context with batch interval of 10 sec
        ssc = StreamingContext(sc, 10)
        ssc.checkpoint("checkpoint")
        pwords = load_wordlist("positive.txt")
        nwords = load_wordlist("negative.txt")
        counts = stream(ssc, pwords, nwords, 100)
        make_plot(counts)


def make_plot(counts):
        """
        This function plots the counts of positive and negative words for each timestep.
        """
        positiveCounts = []
        negativeCounts = []
        time = []

        for val in counts:
        positiveTuple = val[0]
        positiveCounts.append(positiveTuple[1])
        negativeTuple = val[1]
        negativeCounts.append(negativeTuple[1])

        for i in range(len(counts)):
        time.append(i)

        posLine = plt.plot(time, positiveCounts,'bo-', label='Positive')
        negLine = plt.plot(time, negativeCounts,'go-', label='Negative')
        plt.axis([0, len(counts), 0, max(max(positiveCounts), max(negativeCounts))+50])
        plt.xlabel('Time step')
        plt.ylabel('Word count')
        plt.legend(loc = 'upper left')
    plt.show()


def load_wordlist(filename):
    """ 
    This function returns a list or set of words from the given filename.
    """ 
    words = {}
    f = open(filename, 'rU')
    text = f.read()
    text = text.split('\n')
    for line in text:
        words[line] = 1
    f.close()
    return words


def wordSentiment(word,pwords,nwords):
    if word in pwords:
    return ('positive', 1)
    elif word in nwords:
    return ('negative', 1)


def updateFunction(newValues, runningCount):
    if runningCount is None:
       runningCount = 0
    return sum(newValues, runningCount) 


def sendRecord(record):
    connection = createNewConnection()
    connection.send(record)
    connection.close()


def stream(ssc, pwords, nwords, duration):
    kstream = KafkaUtils.createDirectStream(
    ssc, topics = ['twitterstream'], kafkaParams = {"metadata.broker.list": 'localhost:9092'})
    tweets = kstream.map(lambda x: x[1].encode("ascii", "ignore"))

    # Each element of tweets will be the text of a tweet.
    # We keep track of a running total counts and print it at every time step.
    words = tweets.flatMap(lambda line:line.split(" "))
    positive = words.map(lambda word: ('Positive', 1) if word in pwords else ('Positive', 0))
    negative = words.map(lambda word: ('Negative', 1) if word in nwords else ('Negative', 0))
    allSentiments = positive.union(negative)
    sentimentCounts = allSentiments.reduceByKey(lambda x,y: x+y)
    runningSentimentCounts = sentimentCounts.updateStateByKey(updateFunction)
    runningSentimentCounts.pprint()

    # The counts variable hold the word counts for all time steps
    counts = []
    sentimentCounts.foreachRDD(lambda t, rdd: counts.append(rdd.collect()))

    # Start the computation
    ssc.start() 
    ssc.awaitTerminationOrTimeout(duration)
    ssc.stop(stopGraceFully = True)

    return counts


if __name__=="__main__":
    main()

错误恰好如下所示：

Traceback (most recent call last):
  File "/usr/local/lib/python2.7/dist-packages/matplotlib/backends/backend_gtk3.py", line 343, in idle_draw
    self.draw()
  File "/usr/local/lib/python2.7/dist-packages/matplotlib/backends/backend_gtk3.py", line 336, in draw
    self.get_property("window").process_updates (False)
TypeError: Couldn't find foreign struct converter for 'cairo.Context'

这里的问题是matplotlib的后端GTK3。我建议将其更改为只适合您的后端。看

我通常这样做

import matplotlib
matplotlib.use('Agg')

在任何与matplotlib相关的导入之前。您将无法看到该图形，但可以使用savefig（）保存它，然后使用图像查看器打开它。

查看此图。例如，请尝试：

sudo apt-get install python-gi-cairo

对于上面的评论，三年后（我使用WSL2和VSCode，X-server用于GUI应用程序），您应该使用

sudo apt install python3-gi-cairo

而且，您也省去了更改后端的麻烦。

非常感谢，这使meld重新开始工作（它发出了相同的错误消息）。如果您使用python3，您可能需要安装

python3 gi cairo

包。对于Fedora，请尝试

dnf安装python3 gobject