访问;html“U汤”;在python中从类外开始
我对使用类这件事很陌生 我试图调用html_soup变量,以便在类外使用它 我对类的定义如下:访问;html“U汤”;在python中从类外开始,python,class,beautifulsoup,Python,Class,Beautifulsoup,我对使用类这件事很陌生 我试图调用html_soup变量,以便在类外使用它 我对类的定义如下: from bs4 import BeautifulSoup from requests import get from urllib.request import Request, urlopen from selenium.webdriver.support import expected_conditions as ec from selenium.webdriver.chrome.option
from bs4 import BeautifulSoup
from requests import get
from urllib.request import Request, urlopen
from selenium.webdriver.support import expected_conditions as ec
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium import webdriver
class DataScrape():
def __init__(self, website):
self.website = website
self.driver = webdriver.Chrome(
executable_path=r"C:/Users/littl/Downloads/"
+ "chromedriver_win32/chromedriver.exe")
# self.options = Options()
# self.options.add_argument("--headless")# Runs Chrome in headless mode.
# self.options.add_argument('--no-sandbox')# # Bypass OS security model
# self.options.add_argument('start-maximized')
# self.options.add_argument('disable-infobars')
# self.options.add_argument("--disable-extensions")
# self.driver = webdriver.Chrome(chrome_options=self.options,
# executable_path=
# r'C:/Users/littl/Downloads/chromedriver_win32/chromedriver.exe')
def bs4Scrape(self, urlToScrape):
req = Request(urlToScrape, headers={'User-Agent': 'Mozilla/5.0'})
web_byte = urlopen(req).read()
webpage = web_byte.decode('utf-8')
self.html_soup = BeautifulSoup(webpage, 'html5lib')
def seleniumNavigate(self):
self.driver.get(self.website)
imdbScrape = DataScrape("https://www.imdb.com/")
imdbScrape.seleniumNavigate()
wait = WebDriverWait(imdbScrape.driver, 10)
wait.until(ec.element_to_be_clickable((By.XPATH,
'//*[@id="suggestion-search"]'))).send_keys("the"
+ " fifth element")
firstSelection = wait.until(ec.element_to_be_clickable((By.XPATH,
'//*[@id="react-autowhatever-1--'
+ 'item-0"]/span')))
firstSelection.location_once_scrolled_into_view
firstSelection.click()
imdbUrlToScrape = imdbScrape.driver.current_url
imdbScrape.driver.close()
print(imdbScrape.html_soup)
imdbScrape.bs4Scrape(imdbUrlToScrape).html_soup
我试图这样称呼它:
from bs4 import BeautifulSoup
from requests import get
from urllib.request import Request, urlopen
from selenium.webdriver.support import expected_conditions as ec
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium import webdriver
class DataScrape():
def __init__(self, website):
self.website = website
self.driver = webdriver.Chrome(
executable_path=r"C:/Users/littl/Downloads/"
+ "chromedriver_win32/chromedriver.exe")
# self.options = Options()
# self.options.add_argument("--headless")# Runs Chrome in headless mode.
# self.options.add_argument('--no-sandbox')# # Bypass OS security model
# self.options.add_argument('start-maximized')
# self.options.add_argument('disable-infobars')
# self.options.add_argument("--disable-extensions")
# self.driver = webdriver.Chrome(chrome_options=self.options,
# executable_path=
# r'C:/Users/littl/Downloads/chromedriver_win32/chromedriver.exe')
def bs4Scrape(self, urlToScrape):
req = Request(urlToScrape, headers={'User-Agent': 'Mozilla/5.0'})
web_byte = urlopen(req).read()
webpage = web_byte.decode('utf-8')
self.html_soup = BeautifulSoup(webpage, 'html5lib')
def seleniumNavigate(self):
self.driver.get(self.website)
imdbScrape = DataScrape("https://www.imdb.com/")
imdbScrape.seleniumNavigate()
wait = WebDriverWait(imdbScrape.driver, 10)
wait.until(ec.element_to_be_clickable((By.XPATH,
'//*[@id="suggestion-search"]'))).send_keys("the"
+ " fifth element")
firstSelection = wait.until(ec.element_to_be_clickable((By.XPATH,
'//*[@id="react-autowhatever-1--'
+ 'item-0"]/span')))
firstSelection.location_once_scrolled_into_view
firstSelection.click()
imdbUrlToScrape = imdbScrape.driver.current_url
imdbScrape.driver.close()
print(imdbScrape.html_soup)
imdbScrape.bs4Scrape(imdbUrlToScrape).html_soup
但在汇编时说:
NoneType' object has no attribute 'html_soup'
如何编写代码,以便在类外访问html_soup
我猜我必须用另一种方式定义变量,但不确定如何定义
如果您能提供任何帮助,我们将不胜感激。
html_-soup
根据您的代码,它不是该类的一个属性吗?老实说,我不确定这是我第一次使用该类系统,我尝试了一些事情,但不确定如何正确地使用它。imdbScrape.bs4Scrape(imdbUrlToScrape)。html_-soup
真是一团糟,因为您正在执行函数调用或方法调用(对返回None的函数)并尝试访问它在同一行中设置的属性。不要那样做。html\u-soup
是在类的实例上设置的。很难从您的代码中分辨出来,我也不是OOP方面的专家,但我目前认为不需要类。请尝试imdbScrape.html\u soup
。请注意,变量名和函数名应该跟在带有下划线的小写字母后面。@littlejiver DoimdbScrape.bs4Scrape(imdbUrlToScrape)
。注意,您已经在使用访问类实例上的某些内容的概念(在本例中,对于类DataScrape,实例/对象是imdbScrape)。然后分别执行imdbScrape.html\u soup
,因为html\u soup
是在实例上设置的。本质上,类没有什么特别之处,只是一些额外的语法。