过滤Twitter流媒体API的地理位置-使用IPython和Mongo DB

过滤Twitter流媒体API的地理位置-使用IPython和Mongo DB,python,mongodb,twitter,geocode,twitter-streaming-api,Python,Mongodb,Twitter,Geocode,Twitter Streaming Api,我对编程还不熟悉,我试图了解Jupyter笔记本中的代码,以便将推特从特定位置传输到Mongo DB数据库。我做这件事有困难。有人能告诉我我是否使用了正确的地理编码来过滤推特流吗 多谢各位 我使用的完整代码如下: import numpy as np import pandas as pd import tweepy import time import math import os import sys from geopy import geocoders from tweepy impo

我对编程还不熟悉,我试图了解Jupyter笔记本中的代码,以便将推特从特定位置传输到Mongo DB数据库。我做这件事有困难。有人能告诉我我是否使用了正确的地理编码来过滤推特流吗

多谢各位

我使用的完整代码如下:

import numpy as np
import pandas as pd
import tweepy
import time
import math
import os
import sys
from geopy import geocoders

from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener


import matplotlib.pyplot as plt

import ipywidgets as wgt
from IPython.display import display
from sklearn.feature_extraction.text import CountVectorizer
import re
from datetime import datetime

%matplotlib inline

api_key = "*****" # <---- Add your API Key
api_secret = "****" # <---- Add your API Secret
access_token = "****" # <---- Add your access token
access_token_secret = "****" # <---- Add your access token secret

auth = tweepy.OAuthHandler(api_key, api_secret)
auth.set_access_token(access_token, access_token_secret)

class listener(StreamListener):

def __init__(self, start_time, time_limit=60):

    self.time = start_time
     self.limit = time_limit
     self.tweet_data = []

def on_data(self, data):

saveFile = io.open('raw_tweets.json', 'a', encoding='utf-8')

     while (time.time() - self.time) < self.limit:

         try:
             self.tweet_data.append(data)

            return True

        except BaseException as e:
            print ('failed ondata,', str(e))
            time.sleep(5)
            pass

    saveFile = io.open('raw_tweets.json', 'w', encoding='utf-8')
    saveFile.write(u'[\n')
    saveFile.write(','.join(self.tweet_data))
    saveFile.write(u'\n]')
    saveFile.close()
    exit()

def on_error(self, status):

    print (statuses)


API = tweepy.API(auth)
API.reverse_geocode(51.4545 , -2.5879 , 2000 , 'city' , 1)


import pymongo
from pymongo import MongoClient
import json


start_time = time.time() #grabs the system time

twitterStream = Stream(auth, StreamListener)

myStreamListener = StreamListener#(max_tweets=1000)
myStream = tweepy.Stream(auth = API.auth, listener=myStreamListener)

myStream.filter(track=['API.reverse_geocode'], async=True)

class listener(StreamListener):

counter = 0

def __init__(self, max_tweets=1000, *args, **kwargs):
    self.max_tweets = max_tweets
    self.counter = 0
    super().__init__(*args, **kwargs)

def on_connect(self):
    self.counter = 0
    self.start_time = datetime.now()

def on_status(self, status):
    # Increment counter
    self.counter += 1
    collection.insert_many


    if self.counter % 1 == 0:
        value = int(100.00 * self.counter / self.max_tweets)
        mining_time = datetime.now() - self.start_time
        progress_bar.value = value
        html_value = """<span class="label label-primary">Tweets/Sec: %.1f</span>""" % (self.counter / max([1,mining_time.seconds]))
        html_value += """ <span class="label label-success">Progress: %.1f%%</span>""" % (self.counter / self.max_tweets * 100.0)
        html_value += """ <span class="label label-info">ETA: %.1f Sec</span>""" % ((self.max_tweets - self.counter) / (self.counter / max([1,mining_time.seconds])))
        wgt_status.value = html_value

        if self.counter >= self.max_tweets:
            myStream.disconnect()
            print("Finished")
            print("Total Mining Time: %s" % (mining_time))
            print("Tweets/Sec: %.1f" % (self.max_tweets / mining_time.seconds))
            progress_bar.value = 0

            try:
                client = pymongo.MongoClient('localhost', 27017)
                db = client['happycitydb']
                collection = db['happycitytweets_collection']
                tweet = json.loads(data)
                collection.insert(tweet)

                return True
            except BaseException as e:
                print ('failed ondata,', str(e))
                time.sleep(5)
                pass
            exit()


keywords = ["happy"]

progress_bar = wgt.IntProgress(value=0)
display(progress_bar)
wgt_status = wgt.HTML(value="""<span class="label label primary">Tweets/Sec: 0.0</span>""")
display(wgt_status)

for error_counter in range(5):
try:
    myStream.filter(track=keywords)
    print("Tweets collected: %s" % myStream.listener.counter)
    print("Total tweets in collection: %s" % col.count())
    break
except:
    print("ERROR# %s" % (error_counter + 1))
将numpy导入为np
作为pd进口熊猫
进口粗花呢
导入时间
输入数学
导入操作系统
导入系统
从geopy导入地理编码器
从tweepy导入流
从tweepy导入OAuthHandler
从tweepy.streaming导入StreamListener
将matplotlib.pyplot作为plt导入
将ipywidgets作为wgt导入
从IPython.display导入显示
从sklearn.feature\u extraction.text导入countvectorier
进口稀土
从日期时间导入日期时间
%matplotlib内联
api_key=“*****”#=self.max_推文:
myStream.disconnect()
打印(“完成”)
打印(“总挖掘时间:%s”%(挖掘时间))
打印(“Tweets/Sec:%.1f”%(self.max_Tweets/mining_time.seconds))
进度条值=0
尝试:
client=pymongo.MongoClient('localhost',27017)
db=client['happycitydb']
collection=db['happycitytweets\u collection']
tweet=json.loads(数据)
收藏.插入(推特)
返回真值
除BaseException作为e外:
打印('ondata'失败,'str(e))
时间。睡眠(5)
通过
退出()
关键词=[“快乐”]
进度条=wgt.IntProgress(值=0)
显示(进度条)
wgt_status=wgt.HTML(值=“推特/秒:0.0”)
显示(wgt_状态)
对于范围(5)内的错误计数器:
尝试:
myStream.filter(track=关键字)
打印(“收集的推文:%s”%myStream.listener.counter)
打印(“集合中的推文总数:%s”%col.count()
打破
除:
打印(“错误#%s”%(错误#计数器+1))