Filter 过滤tweepy.StreamListener中的tweets on_data方法_Filter_Tweepy_Tweets

Filter 过滤tweepy.StreamListener中的tweets on_data方法

filter

Filter 过滤tweepy.StreamListener中的tweets on_data方法,filter,tweepy,tweets,Filter,Tweepy,Tweets,从许多关于堆栈溢出的文章中了解到，tweepy.streaming.stream类中的筛选器方法使用逻辑OR fortrack和location参数因此，下面将返回location=USA或带有单词“”的推文这个解决方案( )在on_status方法中检查关键字效果很好，但是如果我需要存储整个json变量，我想我必须使用on_data 因此更改了on_数据（如下面的代码所示），但得到一个错误： File "/Library/Python/2.7/site-packages/tweepy/

从许多关于堆栈溢出的文章中了解到，tweepy.streaming.stream类中的筛选器方法使用逻辑OR fortrack和location参数

因此，下面将返回location=USA或带有单词“”的推文

这个解决方案( )在on_status方法中检查关键字效果很好，但是如果我需要存储整个json变量，我想我必须使用on_data

因此更改了on_数据（如下面的代码所示），但得到一个错误：

  File "/Library/Python/2.7/site-packages/tweepy/streaming.py", line 294, in _run
    raise exception
KeyError: 'text'

--编码：utf-8--

  File "/Library/Python/2.7/site-packages/tweepy/streaming.py", line 294, in _run
    raise exception
KeyError: 'text'

from types import *
import tweepy
import json
import argparse
import io

class EchoStreamListener(tweepy.StreamListener):
    def __init__(self, api, dump_json=False, numtweets=0):
        self.api = api
        self.dump_json = dump_json
        self.count = 0
        self.limit = int(numtweets)

        super(tweepy.StreamListener, self).__init__()

# def on_status(self, status):
#     if any(keyWord in status.text.lower() for keyWord in keyWordList):
#         print status.text
#
#         self.count+=1
#         return False if self.count == self.limit else True
#     else:
#         return True # Don't kill the stream

    def on_data(self, tweet):
        tweet_data = json.loads(tweet)  # This allows the JSON data be used as a normal dictionary:

        if any(keyWord in tweet_data['text'] for keyWord in keyWordList):
            if self.dump_json:
                print json.dumps(tweet_data)
                saveFile.write(unicode(tweet) + "\n")

                self.count+=1
                return False if self.count == self.limit else True
            else:
                print tweet_data['created_at','name','text'].encode("utf-8").rstrip()

    def on_error(self, status_code):
        print >> sys.stderr, 'Encountered error with status code:', status_code
        return True 

def get_parser():
    parser = argparse.ArgumentParser(add_help=True)
    group = parser.add_mutually_exclusive_group(required=True)

    group.add_argument(
        '-j', '--json',
        action='store_true',
        help='dump each tweet as a json string'
    )
    group.add_argument(
        '-t', '--text',
        dest='json',
        action='store_false',
        help='dump each tweet\'s text'
    )
    parser.add_argument(
        '-n', '--numtweets',
        metavar='numtweets',
        help='set number of tweets to retrieve'
    )
    return parser

if __name__ == '__main__':

    oauthObject = tweepy.OAuthHandler(myconsumer_key, myconsumer_secret)
    oauthObject.set_access_token(myaccess_key,myaccess_secret)

    apiInstance = tweepy.API(oauthObject) 
    parser = get_parser()
    args = parser.parse_args()

    streamObj = tweepy.streaming.Stream(oauthObject
                                       ,EchoStreamListener(api=apiInstance,
                                                           dump_json=args.json,
                                                       numtweets=args.numtweets))

    keyWordList = ['panthers','falcon']
    GEOBOX_USA = [-125,25.1,-60.5,49.1]
    saveFile = io.open('/Users/deepaktanna/raw_tweets.json', 'w', encoding='utf-8')

    streamObj.filter(locations=GEOBOX_USA, languages=['en'])

    saveFile.close()