过滤Twitter流媒体API的地理位置-使用IPython和Mongo DB
我对编程还不熟悉,我试图了解Jupyter笔记本中的代码,以便将推特从特定位置传输到Mongo DB数据库。我做这件事有困难。有人能告诉我我是否使用了正确的地理编码来过滤推特流吗 多谢各位 我使用的完整代码如下:过滤Twitter流媒体API的地理位置-使用IPython和Mongo DB,python,mongodb,twitter,geocode,twitter-streaming-api,Python,Mongodb,Twitter,Geocode,Twitter Streaming Api,我对编程还不熟悉,我试图了解Jupyter笔记本中的代码,以便将推特从特定位置传输到Mongo DB数据库。我做这件事有困难。有人能告诉我我是否使用了正确的地理编码来过滤推特流吗 多谢各位 我使用的完整代码如下: import numpy as np import pandas as pd import tweepy import time import math import os import sys from geopy import geocoders from tweepy impo
import numpy as np
import pandas as pd
import tweepy
import time
import math
import os
import sys
from geopy import geocoders
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import matplotlib.pyplot as plt
import ipywidgets as wgt
from IPython.display import display
from sklearn.feature_extraction.text import CountVectorizer
import re
from datetime import datetime
%matplotlib inline
api_key = "*****" # <---- Add your API Key
api_secret = "****" # <---- Add your API Secret
access_token = "****" # <---- Add your access token
access_token_secret = "****" # <---- Add your access token secret
auth = tweepy.OAuthHandler(api_key, api_secret)
auth.set_access_token(access_token, access_token_secret)
class listener(StreamListener):
def __init__(self, start_time, time_limit=60):
self.time = start_time
self.limit = time_limit
self.tweet_data = []
def on_data(self, data):
saveFile = io.open('raw_tweets.json', 'a', encoding='utf-8')
while (time.time() - self.time) < self.limit:
try:
self.tweet_data.append(data)
return True
except BaseException as e:
print ('failed ondata,', str(e))
time.sleep(5)
pass
saveFile = io.open('raw_tweets.json', 'w', encoding='utf-8')
saveFile.write(u'[\n')
saveFile.write(','.join(self.tweet_data))
saveFile.write(u'\n]')
saveFile.close()
exit()
def on_error(self, status):
print (statuses)
API = tweepy.API(auth)
API.reverse_geocode(51.4545 , -2.5879 , 2000 , 'city' , 1)
import pymongo
from pymongo import MongoClient
import json
start_time = time.time() #grabs the system time
twitterStream = Stream(auth, StreamListener)
myStreamListener = StreamListener#(max_tweets=1000)
myStream = tweepy.Stream(auth = API.auth, listener=myStreamListener)
myStream.filter(track=['API.reverse_geocode'], async=True)
class listener(StreamListener):
counter = 0
def __init__(self, max_tweets=1000, *args, **kwargs):
self.max_tweets = max_tweets
self.counter = 0
super().__init__(*args, **kwargs)
def on_connect(self):
self.counter = 0
self.start_time = datetime.now()
def on_status(self, status):
# Increment counter
self.counter += 1
collection.insert_many
if self.counter % 1 == 0:
value = int(100.00 * self.counter / self.max_tweets)
mining_time = datetime.now() - self.start_time
progress_bar.value = value
html_value = """<span class="label label-primary">Tweets/Sec: %.1f</span>""" % (self.counter / max([1,mining_time.seconds]))
html_value += """ <span class="label label-success">Progress: %.1f%%</span>""" % (self.counter / self.max_tweets * 100.0)
html_value += """ <span class="label label-info">ETA: %.1f Sec</span>""" % ((self.max_tweets - self.counter) / (self.counter / max([1,mining_time.seconds])))
wgt_status.value = html_value
if self.counter >= self.max_tweets:
myStream.disconnect()
print("Finished")
print("Total Mining Time: %s" % (mining_time))
print("Tweets/Sec: %.1f" % (self.max_tweets / mining_time.seconds))
progress_bar.value = 0
try:
client = pymongo.MongoClient('localhost', 27017)
db = client['happycitydb']
collection = db['happycitytweets_collection']
tweet = json.loads(data)
collection.insert(tweet)
return True
except BaseException as e:
print ('failed ondata,', str(e))
time.sleep(5)
pass
exit()
keywords = ["happy"]
progress_bar = wgt.IntProgress(value=0)
display(progress_bar)
wgt_status = wgt.HTML(value="""<span class="label label primary">Tweets/Sec: 0.0</span>""")
display(wgt_status)
for error_counter in range(5):
try:
myStream.filter(track=keywords)
print("Tweets collected: %s" % myStream.listener.counter)
print("Total tweets in collection: %s" % col.count())
break
except:
print("ERROR# %s" % (error_counter + 1))
将numpy导入为np
作为pd进口熊猫
进口粗花呢
导入时间
输入数学
导入操作系统
导入系统
从geopy导入地理编码器
从tweepy导入流
从tweepy导入OAuthHandler
从tweepy.streaming导入StreamListener
将matplotlib.pyplot作为plt导入
将ipywidgets作为wgt导入
从IPython.display导入显示
从sklearn.feature\u extraction.text导入countvectorier
进口稀土
从日期时间导入日期时间
%matplotlib内联
api_key=“*****”#=self.max_推文:
myStream.disconnect()
打印(“完成”)
打印(“总挖掘时间:%s”%(挖掘时间))
打印(“Tweets/Sec:%.1f”%(self.max_Tweets/mining_time.seconds))
进度条值=0
尝试:
client=pymongo.MongoClient('localhost',27017)
db=client['happycitydb']
collection=db['happycitytweets\u collection']
tweet=json.loads(数据)
收藏.插入(推特)
返回真值
除BaseException作为e外:
打印('ondata'失败,'str(e))
时间。睡眠(5)
通过
退出()
关键词=[“快乐”]
进度条=wgt.IntProgress(值=0)
显示(进度条)
wgt_status=wgt.HTML(值=“推特/秒:0.0”)
显示(wgt_状态)
对于范围(5)内的错误计数器:
尝试:
myStream.filter(track=关键字)
打印(“收集的推文:%s”%myStream.listener.counter)
打印(“集合中的推文总数:%s”%col.count()
打破
除:
打印(“错误#%s”%(错误#计数器+1))