Python 3.x soup.find无法在yahoo finance上找到文本_Python 3.x_Web Scraping_Beautifulsoup_Yahoo_Yahoo Finance - Fatal编程技术网

Python 3.x soup.find无法在yahoo finance上找到文本

python-3.x web-scraping

Python 3.x soup.find无法在yahoo finance上找到文本,python-3.x,web-scraping,beautifulsoup,yahoo,yahoo-finance,Python 3.x,Web Scraping,Beautifulsoup,Yahoo,Yahoo Finance,我收到以下错误：传递的“经营活动产生的现金流量总额”无效从该url： “这变得有点困难，因为中的“净收入”包含在一个强有力的标签中” 有人能给我解释一下为什么这个代码不能用于运营活动的总现金流，以及我如何确定某个东西有一个强标签代码：如果您检查链接返回的源（第69行）： url=“+statement+”？s=“+ticker_symbol+”&crumb=“+crumb 您将看到，它没有任何关于经营活动总现金流的有用信息。如果你把它改成 url=“+ticker\u symbol+”

我收到以下错误：

传递的“经营活动产生的现金流量总额”无效

从该url：

“这变得有点困难，因为中的“净收入”包含在一个强有力的标签中”

有人能给我解释一下为什么这个代码不能用于运营活动的总现金流，以及我如何确定某个东西有一个强标签

代码：

如果您检查链接返回的源（第69行）：

url=“+statement+”？s=“+ticker_symbol+”&crumb=“+crumb

您将看到，它没有任何关于经营活动总现金流的有用信息。如果你把它改成

url=“+ticker\u symbol+”/现金流？p=“+ticker\u symbol

它将返回您正在查找的信息：

[658240000008126600000059713000000]

如果您检查链接返回的源（第69行）：

url=“+statement+”？s=“+ticker_symbol+”&crumb=“+crumb

您将看到，它没有任何关于经营活动总现金流的有用信息。如果你把它改成

url=“+ticker\u symbol+”/现金流？p=“+ticker\u symbol

它将返回您正在查找的信息：

[658240000008126600000059713000000]

Zroq，非常感谢你帮助我。根据您的建议，我能够从雅虎金融获得预期的结果。我能问一下你所说的检查是什么意思吗？只是简单地将url复制粘贴到web浏览器中吗？Zroq，非常感谢您的帮助。根据您的建议，我能够从雅虎金融获得预期的结果。我能问一下你所说的检查是什么意思吗？是否只是将url复制并粘贴到web浏览器中？

import re, requests
from bs4 import BeautifulSoup
import sys
"""
import os  #  file system operations
import re  # regular expressions
import pandas as pd # pandas... the best time series library out there
import datetime as dt # date and time functions
import io
"""
# search with regular expressions

# "CrumbStore":\{"crumb":"(?<crumb>[^"]+)"\}

def get_crumb():
    url = 'https://uk.finance.yahoo.com/quote/AAPL/history' # url for a ticker symbol, with a download link
    r = requests.get(url)  # download page

    txt = r.text # extract html

    cookie = r.cookies['B'] # the cooke we're looking for is named 'B'
    print('Cookie: ', cookie)

    # Now we need to extract the token from html. 
    # the string we need looks like this: "CrumbStore":{"crumb":"lQHxbbYOBCq"}
    # regular expressions will do the trick!

    pattern = re.compile('.*"CrumbStore":\{"crumb":"(?P<crumb>[^"]+)"\}')

    for line in txt.splitlines():
        m = pattern.match(line)
        if m is not None:
            crumb = m.groupdict()['crumb']


    print('Crumb=',crumb)
    return crumb

def periodic_figure_values(soup, yahoo_figure):

    values = []
    pattern = re.compile(yahoo_figure)

    title = soup.find("strong", text=pattern)    # works for the figures printed in bold
    if title:
        row = title.parent.parent
    else:
        title = soup.find("td", text=pattern)    # works for any other available figure
        if title:
            row = title.parent
        else:
            sys.exit("Invalid figure '" + yahoo_figure + "' passed.")

    cells = row.find_all("td")[1:]    # exclude the <td> with figure name
    for cell in cells:
        if cell.text.strip() != yahoo_figure:    # needed because some figures are indented
            str_value = cell.text.strip().replace(",", "").replace("(", "-").replace(")", "")
            if str_value == "-":
                str_value = 0
            value = int(str_value) * 1000
            values.append(value)
    return values

def financials_soup(ticker_symbol, statement, quarterly=False):
    if statement == "is" or statement == "bs" or statement == "cf":
        crumb = get_crumb()
        url = "https://finance.yahoo.com/q/" + statement + "?s=" + ticker_symbol + "&crumb=" + crumb
        if not quarterly:
            url += "&annual"
        return BeautifulSoup(requests.get(url).text, "html.parser")

    return sys.exit("Invalid financial statement code '" + statement + "' passed.")

print(periodic_figure_values(financials_soup("AAPL", "cf"), "Total Cash Flow From Operating Activities"))

def financials_soup(ticker_symbol, statement, quarterly=False):
    if statement == "financials" or statement == "balance-sheet" or statement == "cash-flow":
        crumb = get_crumb()
        url = "https://finance.yahoo.com/quote/" + ticker_symbol + "/" + statement + "?p=" + ticker_symbol + "&crumb=" + crumb
        if not quarterly:
            url += "&annual"
        return BeautifulSoup(requests.get(url).text, "html.parser")

    return sys.exit("Invalid financial statement code '" + statement + "' passed.")

[web scraping]相关文章推荐

随机文章推荐