使用python从twitter的推文中提取数据_Python_Twitter_Extraction

使用python从twitter的推文中提取数据

python twitter

使用python从twitter的推文中提取数据,python,twitter,extraction,Python,Twitter,Extraction,我想提取tweet id、twitter用户名、tweet中显示fb.me链接的用户的twitter id以及他的fb id和fb用户名等数据我必须为200条这样的推文做这件事我的代码： from twitter.oauth import OAuth import json import urllib2 from twitter import * ckey = '' csecret = '' atoken = '' asecret = '' auth = OAuth(atoken

我想提取tweet id、twitter用户名、tweet中显示fb.me链接的用户的twitter id以及他的fb id和fb用户名等数据

我必须为200条这样的推文做这件事

我的代码：

from twitter.oauth import OAuth
import json
import urllib2
from twitter import *

ckey = ''
csecret = '' 
atoken = '' 
asecret = ''



auth = OAuth(atoken,asecret,ckey,csecret)

t_api = Twitter(auth=auth)

search = t_api.search.tweets(q='http://on.fb.me',count=1)

print search

print 'specific data'

#print search['statuses'][0]['entities']['urls']

现在正在检索1个结果，并希望提取上述数据

结果我得到：

{u'search_metadata': {u'count': 1, u'completed_in': 0.021, u'max_id_str': u'542227367834685440', u'since_id_str': u'0', u'next_results': u'?max_id=542227367834685439&q=http%3A%2F%2Fon.fb.me&count=1&include_entities=1', u'refresh_url': u'?since_id=542227367834685440&q=http%3A%2F%2Fon.fb.me&include_entities=1', u'since_id': 0, u'query': u'http%3A%2F%2Fon.fb.me', u'max_id': 542227367834685440L}, u'statuses': [{u'contributors': None, u'truncated': False, u'text': u'Check out Monday Morning Cooking Club Cooking Tip Day #1 --&gt;http://t.co/j6mbg1OE6Z | http://t.co/c7qjunLQz2', u'in_reply_to_status_id': None, u'id': 542227367834685440L, u'favorite_count': 0, u'source': u'<a href="http://www.hootsuite.com" rel="nofollow">Hootsuite</a>', u'retweeted': False, u'coordinates': None, u'entities': {u'symbols': [], u'user_mentions': [], u'hashtags': [], u'urls': [{u'url': u'http://t.co/j6mbg1OE6Z', u'indices': [63, 85], u'expanded_url': u'http://on.fb.me/', u'display_url': u'on.fb.me'}, {u'url': u'http://t.co/c7qjunLQz2', u'indices': [88, 110], u'expanded_url': u'http://bit.ly/12BbG16', u'display_url': u'bit.ly/12BbG16'}]}, u'in_reply_to_screen_name': None, u'in_reply_to_user_id': None, u'retweet_count': 0, u'id_str': u'542227367834685440', u'favorited': False, u'user': {u'follow_request_sent': False, u'profile_use_background_image': True, u'profile_text_color': u'333333', u'default_profile_image': False, u'id': 226140415, u'profile_background_image_url_https': u'https://pbs.twimg.com/profile_background_images/704964581/bc37b358019be05efe1094a0d100ea53.jpeg', u'verified': False, u'profile_location': None, u'profile_image_url_https': u'https://pbs.twimg.com/profile_images/469488950050955264/FOoWjIEZ_normal.jpeg', u'profile_sidebar_fill_color': u'DDEEF6', u'entities': {u'url': {u'urls': [{u'url': u'http://t.co/sida0E6eXy', u'indices': [0, 22], u'expanded_url': u'http://www.mondaymorningcookingclub.com.au', u'display_url': u'mondaymorningcookingclub.com.au'}]}, u'description': {u'urls': []}}, u'followers_count': 1574, u'profile_sidebar_border_color': u'000000', u'id_str': u'226140415', u'profile_background_color': u'EDCDC7', u'listed_count': 50, u'is_translation_enabled': False, u'utc_offset': 39600, u'statuses_count': 12594, u'description': u"Monday Morning Cooking Club. A bunch of Sydney gals sharing and preserving the wonderful recipes of Australia's culturally diverse Jewish community.", u'friends_count': 1904, u'location': u'Sydney,  Australia', u'profile_link_color': u'C40A38', u'profile_image_url': u'http://pbs.twimg.com/profile_images/469488950050955264/FOoWjIEZ_normal.jpeg', u'following': False, u'geo_enabled': True, u'profile_banner_url': u'https://pbs.twimg.com/profile_banners/226140415/1400769931', u'profile_background_image_url': u'http://pbs.twimg.com/profile_background_images/704964581/bc37b358019be05efe1094a0d100ea53.jpeg', u'name': u'Lisa Goldberg', u'lang': u'en', u'profile_background_tile': False, u'favourites_count': 1309, u'screen_name': u'MondayMorningCC', u'notifications': False, u'url': u'http://t.co/sida0E6eXy', u'created_at': u'Mon Dec 13 12:22:13 +0000 2010', u'contributors_enabled': False, u'time_zone': u'Sydney', u'protected': False, u'default_profile': False, u'is_translator': False}, u'geo': None, u'in_reply_to_user_id_str': None, u'possibly_sensitive': False, u'lang': u'en', u'created_at': u'Tue Dec 09 08:00:53 +0000 2014', u'in_reply_to_status_id_str': None, u'place': None, u'metadata': {u'iso_language_code': u'en', u'result_type': u'recent'}}]}

{u'search\u metadata'：“u'54222778787878785440”，u'u'u'u'u'u'u'u'u'u'u'u'u'u'u'u'u'u'u'u'u'u'u'u'u'u'u'u'u'u'u'u'u'u'u'u'u'u'u'u'u'u'u'u'u'u''u'u'u'u'u'u'u'u'u'u'u'u'u'u'u'u'u'u'u'u'u'u'u'u'78787878787878787878787878787878787878787878787878787878787878787878787878783434343434345440'3434345440’：1:1:1:1，1，u'u'u'u'u'u'u'u'u'u'u'我d'：54222736734685440L}，u'statuses'：[{u'contributors'：无，u'truncated'：False，u'text'：u'Check out Monday Morning Cooking Club Cooking Tip Day#1--http://t.co/j6mbg1OE6Z | http://t.co/c7qjunLQz2“，u'in_reply_to_status_id'：无，u'id'：5422273678346845440L，u'favorite_count'：0，u'source'：u''，u'retweeted'：False，u'coordinates'：无，u'entities'：{u'symbols'：[]，u'user_提到“：[]，u'hashtags'：[]，u'url'：[{u'url'：u'http://t.co/j6mbg1OE6Z“，u'index'：[63,85]，u'expanded_url'：u'http://on.fb.me/“，u'display_url'：u'on.fb.me'}，{u'url'：u'http://t.co/c7qjunLQz2“，u'index'：[88110]，u'expanded_url'：u'http://bit.ly/12BbG16“，u'display_url”：u'bit.ly/12BbG16'}]}，u'in_reply_to_screen_name'：无，u'in_reply_to_user_id'：无，u'retweet_count'：0，u'id_str'：u'54222736834685440'，u'favorited'：False，u'user'：{u'follow_request_sent'：False，u'profile_use_background_image'：True，u'profile_text_color'：u'333333'，u'default_profile_image'：False，u'id'：226140415，u'profile_background_image_url_https'：u'https://pbs.twimg.com/profile_background_images/704964581/bc37b358019be05efe1094a0d100ea53.jpeg“，u'verified'：False，u'profile_location'：无，u'profile_image_url_https:'u'https://pbs.twimg.com/profile_images/469488950050955264/FOoWjIEZ_normal.jpeg“，u'profile\u sidebar\u fill\u color”：u'DDEEF6'，u'entities'：{u'url'：{u'url'：[{u'url'：u'http://t.co/sida0E6eXy“，u'index'：[0,22]，u'expanded_url'：u'http://www.mondaymorningcookingclub.com.au“，u'display_url”：u'mondaymorningcookingclub.com.au'}]}，u'description'：{u'url'：[]}，u'followers\u count'：1574，u'profile\u边栏\u border\u color'：u'000000'，u'id\u str'：u'226140415'，u'profile\u background\u color'：u'EDCDC7'，u'listed\u count'：50，u'is\u translation\u enabled'：False，u'utc\u offset'：39600，u'statuses\u count'：12594，u'description'：u“周一早晨烹饪俱乐部。一群悉尼女孩分享并保存着澳大利亚文化多样的犹太社区的绝妙食谱。"，u'friends\u count'：1904，u'location'：u'Sydney，Australia'，u'profile\u link\u color'：u'C40A38'，u'profile\u image\u url'：u'http://pbs.twimg.com/profile_images/469488950050955264/FOoWjIEZ_normal.jpeg“，u'following”：False，u'geo\u enabled”：True，u'profile\u banner\u url”：u'https://pbs.twimg.com/profile_banners/226140415/1400769931，你的个人资料你的背景mage_url'：u'http://pbs.twimg.com/profile_background_images/704964581/bc37b358019be05efe1094a0d100ea53.jpeg“，u'name'：u'Lisa Goldberg'，u'lang'：u'en'，u'profile\u background\u tile'：False，u'favorites\u count'：1309，u'screen\u name'：u'MondayMorningCC'，u'notifications'：False，u'url'：u'http://t.co/sida0E6eXy“，u'created_at”：u'Mon Dec 13 12:22:13+0000 2010'，u'contributors\u enabled'：False，u'time\u zone'：u'Sydney'，u'protected'：False，u'default\u profile'：False，u'is\u translator'：False}，u'geo'：None，u'in'reply_to_user_id_str'：None，u'mably_sensitive'：False，u'lang'：u'en'，u'created_at'：u'Tue Dec 09 08:00:53+0000 2014，'in'reply_to_status_id_str'：None，u'place'：None，u'metadata'：{u'iso语言代码：u'en'，u'result_type'：u'recent'}

您能帮我找到如何检索这些特定数据吗？

您可以执行类似的操作来发出查询，然后通过使用相应的键进行查询来获取所需的数据

import json
import urllib2
import twitter

ckey = 'Your consumer key'
csecret = 'your consumer secret' 
atoken = 'your token' 
asecret = 'your secret token'

auth = twitter.oauth.OAuth(atoken, asecret,
                           ckey, csecret)

twitter_api = twitter.Twitter(auth=auth)

q = 'http://on.fb.me'

count = 100

search_results = twitter_api.search.tweets(q=q, count=count)

statuses = search_results['statuses']

# Iterate through 5 more batches of results by following the cursor

for _ in range(5):
    print "Length of statuses", len(statuses)
    try:
        next_results = search_results['search_metadata']['next_results']
    except KeyError, e: # No more results when next_results doesn't exist
        break

    # Create a dictionary from next_results, which has the following form:
    # ?max_id=313519052523986943&q=NCAA&include_entities=1
    kwargs = dict([ kv.split('=') for kv in next_results[1:].split("&") ])

    search_results = twitter_api.search.tweets(**kwargs)
    statuses += search_results['statuses']

# Show one sample search result by slicing the list...
print json.dumps(statuses[0], indent=1)

# get relevant data into lists
user_names = [ user_mention['name'] 
                 for status in statuses
                     for user_mention in status['entities']['user_mentions'] ]

screen_names = [ user_mention['screen_name'] 
                 for status in statuses
                     for user_mention in status['entities']['user_mentions'] ]

id_str = [ user_mention['id_str'] 
                 for status in statuses
                     for user_mention in status['entities']['user_mentions'] ]

t_id = [ status['id'] 
         for status in statuses ]

# print out first 5 results
print json.dumps(screen_names[0:5], indent=1) 
print json.dumps(user_names[0:5], indent=1)
print json.dumps(id_str[0:5], indent=1)
print json.dumps(t_id[0:5], indent=1)

结果:

[
 "DijalogNet", 
 "Kihot_ex_of", 
 "Kihot_ex_of", 
 "JAsunshine1011", 
 "RobertCornegyJr"
]
[
 "Dijalog Net", 
 "Sa\u0161a Jankovi\u0107", 
 "Sa\u0161a Jankovi\u0107", 
 "Raycent Edwards", 
 "Robert E Cornegy, Jr"
]
[
 "2380692464", 
 "563692937", 
 "563692937", 
 "15920807", 
 "460051837"
]
[
 542309722385580032, 
 542227367834685440, 
 542202885514461185, 
 542201843448045568, 
 542188061598437376
]

查看更多关于如何使用api的示例。

感谢您宝贵的帖子。我已经完成了这部分。我怀疑您正在检索的两个id，根据我的说法，第二个id是tweet id，twitter id应该是['user']['id']。但是你能告诉我如何检索twitter用户对应的facebook用户id和facebook用户名吗？因为这是我最关心的问题。我认为推特用户的fb帐户不清楚。你唯一能做的就是，检查推特本身的fb链接，然后使用facebook api来确定发布该帖子的用户（但是，这不一定是原始的推特用户）。我不熟悉facebook api，因此我无法说出潜在的限制。您能否指定您检索到的两个id的类型，即用户id和推特id。另外，请检查我在问题中发布的结果中的用户数组？