在Python上使用BeautifulSoup的Webscraper
我的任务是用BeautifulSoup制作网页刮板。 代码中定义了某些函数。 如何将bs4.element.ResultSet传递给另一个函数,并以字典的形式提取相关数据,因为bs4.element.ResultSet是作为python列表文件传递的 这是我到目前为止能够编写的代码,包含注释以获取更多信息在Python上使用BeautifulSoup的Webscraper,python,beautifulsoup,Python,Beautifulsoup,我的任务是用BeautifulSoup制作网页刮板。 代码中定义了某些函数。 如何将bs4.element.ResultSet传递给另一个函数,并以字典的形式提取相关数据,因为bs4.element.ResultSet是作为python列表文件传递的 这是我到目前为止能够编写的代码,包含注释以获取更多信息 def fetchWebsiteData(url_website): """Fetches rows of tabular data from given URL
def fetchWebsiteData(url_website):
"""Fetches rows of tabular data from given URL of a website with data excluding table headers.
Parameters
----------
url_website : str
URL of a website
Returns
-------
bs4.element.ResultSet
"""
web_page_data = ''
####
req = requests.get(url_website)
soup= BeautifulSoup(req.text,'html.parser')
web_page_data = soup.find_all('tbody')
####
return web_page_data
我正在使用的另一个功能:
def fetchVaccineDoses(web_page_data):
"""Fetch the Vaccine Doses available from the Web-page data and provide Options to select the respective Dose.
Parameters
----------
web_page_data : bs4.element.ResultSet
All rows of Tabular data fetched from a website excluding the table headers
Returns
-------
dict
Dictionary with the Doses available and Options to select, with Key as 'Option' and Value as 'Command'
Example
-------
>>> url_website = "https://www.mooc.e-yantra.org/task-spec/fetch-mock-covidpage"
>>> web_page_data = fetchWebsiteData(url_website)
>>> print(fetchVaccineDoses(web_page_data))
{'1': 'Dose 1', '2': 'Dose 2'}
"""
vaccine_doses_dict = {}
####
for dose in web_page_data:
dose = dose.find_all('td', class_="dose_num")
for k in dose:
#print (k.next_element)
if (k.next_element) == 1:
vaccine_doses_dict['1'] = "Dose 1"
else:
vaccine_doses_dict['2'] = "Dose 2"
####
return vaccine_doses_dict
以及:
PS->我对编程和学习相当陌生,所以请注意那些糟糕的代码
PPS->所以,我稍微修改了您的代码:
- 示例01-不是提问者想要的![无输出]
- 示例02-定制提问者想要什么[输出]
- 示例03-我如何完成工作[输出]
from bs4 import BeautifulSoup
import requests
def fetchWebsiteData(url_website):
web_page_data = ''
####
req = requests.get(url_website)
soup= BeautifulSoup(req.text,'html.parser')
####
####Changes here
return soup
def fetchVaccineDoses(web_page_data):
dose_count = 0
vaccine_doses_dict = {}
####
doses = web_page_data.find_all('td', class_="dose_num")
####Changes here too
for dose in doses:
vaccine_doses_dict[dose_count] = "Dose " + dose.text
dose_count = dose_count + 1
####
return vaccine_doses_dict
####Changes here too - Complete your empty function with code -
####I hope it is your output what you want to see.
def fetchAgeGroup(web_page_data, doses):
age_count = 0
####
ages = web_page_data.find_all('td', class_="age")
for age in ages:
doses[age_count] = doses[age_count] + " | Age " + age.text
age_count = age_count + 1
####
return doses
url_website = "https://www.mooc.e-yantra.org/task-spec/fetch-mock-covidpage"
web_page_data = fetchWebsiteData(url_website)
VaccineDoses = fetchVaccineDoses(web_page_data)
Ages = fetchAgeGroup(web_page_data, VaccineDoses)
####Ages element 0 can delete because it contains no numbers
####It is only the table header
del Ages[0]
print(Ages)
>>> Select the Dose of Vaccination:
1 :- Dose 1 [Amount: 33]
2 :- Dose 2 [Amount: 42]
0 :- Exit
Choose: 2
<<< Dose Selected: 2
>>> Select the Age Group:
0 :- 45+ [Amount: 29]
1 :- 18+ [Amount: 13]
from bs4 import BeautifulSoup
import requests
def fetchWebsiteData(url_website):
web_page_data = ''
####
req = requests.get(url_website)
soup= BeautifulSoup(req.text,'html.parser')
####
return soup
def fetchAllPossibleData(web_page_data):
data_collaction_dict = {}
doses = {}
data_index = 0
### FOR LOOP BEGINS
for tr in web_page_data.find_all('tr'):
if data_index == 0:
#table header ignore
data_index += 1
continue
hospital_name = tr.find("td", class_="hospital_name").text
state_name = tr.find("td", class_="state_name").text
district_name = tr.find("td", class_="district_name").text
vaccine_name = tr.find("td", class_="vaccine_name").text
dose_num = tr.find("td", class_="dose_num").text
age = tr.find("td", class_="age").text
### dict = { Key: Value, Key: Value, ... }
data_collaction_dict[data_index] = {
"Hospital": hospital_name,
"State": state_name,
"District": district_name,
"Vaccine": vaccine_name,
"Dose": dose_num,
"Age": age
}
### Count "Dose 1" and "Dose 2"
if dose_num in doses:
doses[dose_num] += 1
else:
doses[dose_num] = 1
data_index += 1
### FOR LOOP ENDS
data_collaction_dict["AmountDose"] = doses
return data_collaction_dict
def fetchOnlyNumber(inputStr):
number = 0
for sChar in inputStr:
if sChar.isdigit():
number = (number * 10) + int(sChar)
else:
return -1
return number
def fetchUserInput():
### Integer only
while True:
print("Choose: ", end="")
usrInput = fetchOnlyNumber( input() )
if usrInput > 0:
return usrInput
else:
if usrInput == 0:
###Exit
return 0
else:
print("Wrong input! Only numbers are allowed!")
url_website = "https://www.mooc.e-yantra.org/task-spec/fetch-mock-covidpage"
web_page_data = fetchWebsiteData(url_website)
whole_data = fetchAllPossibleData(web_page_data)
dose_index = 1
print(">>> Select the Dose of Vaccination:")
for dose, amount in whole_data["AmountDose"].items():
print("{0} :- Dose {1} [Amount: {2}]".format(dose_index, dose, amount))
dose_index += 1
print("0 :- Exit")
doseNum = fetchUserInput()
if doseNum == 0:
exit(0)
print("<<< Dose Selected: {0}".format(doseNum))
print(">>> Select the Age Group:")
for key in whole_data.keys():
if key == "AmountDose":
continue
if int(whole_data[key]["Dose"]) == doseNum:
print( "Hospital: {0} | Vaccine: {1} | Dose: {2} | Age: {3}".format(
whole_data[key]["Hospital"],
whole_data[key]["Vaccine"],
whole_data[key]["Dose"],
whole_data[key]["Age"]))
print('-' * 100)
>>> Select the Dose of Vaccination:
1 :- Dose 1 [Amount: 33]
2 :- Dose 2 [Amount: 42]
0 :- Exit
Choose: 1
<<< Dose Selected: 1
>>> Select the Age Group:
Hospital: Apollo Hospital | Vaccine: Covaxin | Dose: 1 | Age: 45+
--------------------------------------------------------------------------------
Hospital: Springedge Care | Vaccine: Covaxin | Dose: 1 | Age: 18+
--------------------------------------------------------------------------------
Hospital: West Valley Medical Center | Vaccine: Covaxin | Dose: 1 | Age: 45+
--------------------------------------------------------------------------------
Hospital: Zenlife Clinic | Vaccine: Covaxin | Dose: 1 | Age: 18+
--------------------------------------------------------------------------------
Hospital: Family Wellness Center | Vaccine: Covishield | Dose: 1 | Age: 18+
--------------------------------------------------------------------------------
Hospital: Tranquil Valley Hospital Center | Vaccine: Covaxin | Dose: 1 | Age: 45+
--------------------------------------------------------------------------------
Hospital: SevenHills | Vaccine: Covishield | Dose: 1 | Age: 18+
--------------------------------------------------------------------------------
示例02:
from bs4 import BeautifulSoup
import requests
def fetchWebsiteData(url_website):
web_page_data = ''
####
req = requests.get(url_website)
soup= BeautifulSoup(req.text,'html.parser')
####
####Changes here
return soup
def fetchVaccineDoses(web_page_data):
dose_count = 0
vaccine_doses_dict = {}
####
doses = web_page_data.find_all('td', class_="dose_num")
####Changes here too
for dose in doses:
vaccine_doses_dict[dose_count] = "Dose " + dose.text
dose_count = dose_count + 1
####
return vaccine_doses_dict
####Changes here too - Complete your empty function with code -
####I hope it is your output what you want to see.
def fetchAgeGroup(web_page_data, doses):
age_count = 0
####
ages = web_page_data.find_all('td', class_="age")
for age in ages:
doses[age_count] = doses[age_count] + " | Age " + age.text
age_count = age_count + 1
####
return doses
url_website = "https://www.mooc.e-yantra.org/task-spec/fetch-mock-covidpage"
web_page_data = fetchWebsiteData(url_website)
VaccineDoses = fetchVaccineDoses(web_page_data)
Ages = fetchAgeGroup(web_page_data, VaccineDoses)
####Ages element 0 can delete because it contains no numbers
####It is only the table header
del Ages[0]
print(Ages)
>>> Select the Dose of Vaccination:
1 :- Dose 1 [Amount: 33]
2 :- Dose 2 [Amount: 42]
0 :- Exit
Choose: 2
<<< Dose Selected: 2
>>> Select the Age Group:
0 :- 45+ [Amount: 29]
1 :- 18+ [Amount: 13]
from bs4 import BeautifulSoup
import requests
def fetchWebsiteData(url_website):
web_page_data = ''
####
req = requests.get(url_website)
soup= BeautifulSoup(req.text,'html.parser')
####
return soup
def fetchAllPossibleData(web_page_data):
data_collaction_dict = {}
doses = {}
data_index = 0
### FOR LOOP BEGINS
for tr in web_page_data.find_all('tr'):
if data_index == 0:
#table header ignore
data_index += 1
continue
hospital_name = tr.find("td", class_="hospital_name").text
state_name = tr.find("td", class_="state_name").text
district_name = tr.find("td", class_="district_name").text
vaccine_name = tr.find("td", class_="vaccine_name").text
dose_num = tr.find("td", class_="dose_num").text
age = tr.find("td", class_="age").text
### dict = { Key: Value, Key: Value, ... }
data_collaction_dict[data_index] = {
"Hospital": hospital_name,
"State": state_name,
"District": district_name,
"Vaccine": vaccine_name,
"Dose": dose_num,
"Age": age
}
### Count "Dose 1" and "Dose 2"
if dose_num in doses:
doses[dose_num] += 1
else:
doses[dose_num] = 1
data_index += 1
### FOR LOOP ENDS
data_collaction_dict["AmountDose"] = doses
return data_collaction_dict
def fetchOnlyNumber(inputStr):
number = 0
for sChar in inputStr:
if sChar.isdigit():
number = (number * 10) + int(sChar)
else:
return -1
return number
def fetchUserInput():
### Integer only
while True:
print("Choose: ", end="")
usrInput = fetchOnlyNumber( input() )
if usrInput > 0:
return usrInput
else:
if usrInput == 0:
###Exit
return 0
else:
print("Wrong input! Only numbers are allowed!")
url_website = "https://www.mooc.e-yantra.org/task-spec/fetch-mock-covidpage"
web_page_data = fetchWebsiteData(url_website)
whole_data = fetchAllPossibleData(web_page_data)
dose_index = 1
print(">>> Select the Dose of Vaccination:")
for dose, amount in whole_data["AmountDose"].items():
print("{0} :- Dose {1} [Amount: {2}]".format(dose_index, dose, amount))
dose_index += 1
print("0 :- Exit")
doseNum = fetchUserInput()
if doseNum == 0:
exit(0)
print("<<< Dose Selected: {0}".format(doseNum))
print(">>> Select the Age Group:")
for key in whole_data.keys():
if key == "AmountDose":
continue
if int(whole_data[key]["Dose"]) == doseNum:
print( "Hospital: {0} | Vaccine: {1} | Dose: {2} | Age: {3}".format(
whole_data[key]["Hospital"],
whole_data[key]["Vaccine"],
whole_data[key]["Dose"],
whole_data[key]["Age"]))
print('-' * 100)
>>> Select the Dose of Vaccination:
1 :- Dose 1 [Amount: 33]
2 :- Dose 2 [Amount: 42]
0 :- Exit
Choose: 1
<<< Dose Selected: 1
>>> Select the Age Group:
Hospital: Apollo Hospital | Vaccine: Covaxin | Dose: 1 | Age: 45+
--------------------------------------------------------------------------------
Hospital: Springedge Care | Vaccine: Covaxin | Dose: 1 | Age: 18+
--------------------------------------------------------------------------------
Hospital: West Valley Medical Center | Vaccine: Covaxin | Dose: 1 | Age: 45+
--------------------------------------------------------------------------------
Hospital: Zenlife Clinic | Vaccine: Covaxin | Dose: 1 | Age: 18+
--------------------------------------------------------------------------------
Hospital: Family Wellness Center | Vaccine: Covishield | Dose: 1 | Age: 18+
--------------------------------------------------------------------------------
Hospital: Tranquil Valley Hospital Center | Vaccine: Covaxin | Dose: 1 | Age: 45+
--------------------------------------------------------------------------------
Hospital: SevenHills | Vaccine: Covishield | Dose: 1 | Age: 18+
--------------------------------------------------------------------------------
下面是图片示例的结果代码
from bs4 import BeautifulSoup
import requests
def fetchWebsiteData(url_website):
web_page_data = ''
####
req = requests.get(url_website)
soup= BeautifulSoup(req.text,'html.parser')
####
return soup
def fetchVaccineDoses(web_page_data):
dose_count = 0
vaccine_doses_dict = {}
####
doses = web_page_data.find_all('td', class_="dose_num")
for dose in doses:
if "Dose " + dose.text in vaccine_doses_dict.keys():
vaccine_doses_dict["Dose " + dose.text] += 1
else:
vaccine_doses_dict["Dose " + dose.text] = 1
dose_count = dose_count + 1
####
#### Table header remove
del vaccine_doses_dict["Dose Dose"]
return vaccine_doses_dict
def fetchAgeGroup(web_page_data, doseNum):
age_dict = {}
age_count = 0
####
lines = web_page_data.find_all('tr')
for line in lines:
if age_count == 0:
##Ignore table header
age_count += 1
continue
if fetchOnlyNumber(line.find("td", class_="dose_num").text) == doseNum:
current_age = line.find("td", class_="age").text
if current_age in age_dict.keys():
age_dict[current_age] += 1
else:
age_dict[current_age] = 1
####
return age_dict
def fetchOnlyNumber(inputStr):
number = 0
for sChar in inputStr:
if sChar.isdigit():
number = (number * 10) + int(sChar)
else:
return -1
return number
def fetchUserInput():
### Integer only
while True:
print("Choose: ", end="")
usrInput = fetchOnlyNumber( input() )
if usrInput > 0:
return usrInput
else:
if usrInput == 0:
###Exit
return 0
else:
print("Wrong input! Only numbers are allowed!")
url_website = "https://www.mooc.e-yantra.org/task-spec/fetch-mock-covidpage"
web_page_data = fetchWebsiteData(url_website)
VaccineDoses = fetchVaccineDoses(web_page_data)
indexVacc = 1
print(">>> Select the Dose of Vaccination:")
for VaccineDose, amount in VaccineDoses.items():
print("{0} :- {1} [Amount: {2}]".format(indexVacc, VaccineDose, amount))
indexVacc += 1
print("0 :- Exit")
doseNum = fetchUserInput()
if doseNum == 0:
exit(0)
print("<<< Dose Selected: {0}".format(doseNum))
print(">>> Select the Age Group:")
indexAge = 0
Ages = fetchAgeGroup(web_page_data, doseNum)
for age, amount in Ages.items():
print("{0} :- {1} [Amount: {2}]".format(indexAge, age, amount))
indexAge += 1
从bs4导入美化组
导入请求
def fetchWebsiteData(url_网站):
网页数据=“”
####
req=请求.获取(url\U网站)
soup=BeautifulSoup(请求文本,'html.parser')
####
返汤
def获取疫苗剂量(网页数据):
剂量计数=0
疫苗剂量
####
剂量=网页数据。查找所有('td',class=“剂量数值”)
以剂量表示的剂量:
如果“剂量”+疫苗剂量目录键()中的剂量文本:
疫苗剂量dict[“剂量”+剂量.text]+=1
其他:
疫苗剂量dict[“剂量”+剂量文本]=1
剂量计数=剂量计数+1
####
####删除表格标题
del疫苗剂量
返回疫苗剂量
def fetchAgeGroup(网页数据,doseNum):
年龄_dict={}
年龄=0
####
行=网页数据。查找所有('tr')
对于行中的行:
如果年龄=0:
##忽略表格标题
年龄\单位计数+=1
持续
如果fetchOnlyNumber(line.find(“td”,class=“dose\u num”).text)=doseNum:
当前年龄=行。查找(“td”,class=“age”)。文本
如果当前年龄在年龄目录键()中:
年龄记录[当前年龄]+=1
其他:
年龄记录[当前年龄]=1
####
返回日期
def FETCHTONLYNUMBER(inputStr):
数字=0
对于inputStr中的sChar:
如果sChar.isdigit():
数字=(数字*10)+整数(沙尔)
其他:
返回-1
返回号码
def fetchUserInput():
###仅限整数
尽管如此:
打印(“选择:”,结束=“”)
usrInput=fetchOnlyNumber(输入())
如果usrInput>0:
返回usrInput
其他:
如果usrInput==0:
###出口
返回0
其他:
打印(“输入错误!只允许数字!”)
url_网站=”https://www.mooc.e-yantra.org/task-spec/fetch-mock-covidpage"
web\u页面\u数据=获取网站数据(url\u网站)
疫苗剂量=获取疫苗剂量(网页数据)
indexVacc=1
打印(“>>>选择接种剂量:”)
对于接种剂量,以接种剂量表示的数量。项目()
打印(“{0}:-{1}[金额:{2}]”。格式(indexVacc,疫苗剂量,金额))
indexVacc+=1
打印(“0:-退出”)
doseNum=fetchUserInput()
如果doseNum==0:
出口(0)
打印(“>选择年龄组:”)
指数=0
Ages=fetchAgeGroup(网页数据,doseNum)
对于年龄,以年龄为单位的金额。项()
打印(“{0}:-{1}[金额:{2}]”。格式(索引、年龄、金额))
指数+=1
示例02的输出:
from bs4 import BeautifulSoup
import requests
def fetchWebsiteData(url_website):
web_page_data = ''
####
req = requests.get(url_website)
soup= BeautifulSoup(req.text,'html.parser')
####
####Changes here
return soup
def fetchVaccineDoses(web_page_data):
dose_count = 0
vaccine_doses_dict = {}
####
doses = web_page_data.find_all('td', class_="dose_num")
####Changes here too
for dose in doses:
vaccine_doses_dict[dose_count] = "Dose " + dose.text
dose_count = dose_count + 1
####
return vaccine_doses_dict
####Changes here too - Complete your empty function with code -
####I hope it is your output what you want to see.
def fetchAgeGroup(web_page_data, doses):
age_count = 0
####
ages = web_page_data.find_all('td', class_="age")
for age in ages:
doses[age_count] = doses[age_count] + " | Age " + age.text
age_count = age_count + 1
####
return doses
url_website = "https://www.mooc.e-yantra.org/task-spec/fetch-mock-covidpage"
web_page_data = fetchWebsiteData(url_website)
VaccineDoses = fetchVaccineDoses(web_page_data)
Ages = fetchAgeGroup(web_page_data, VaccineDoses)
####Ages element 0 can delete because it contains no numbers
####It is only the table header
del Ages[0]
print(Ages)
>>> Select the Dose of Vaccination:
1 :- Dose 1 [Amount: 33]
2 :- Dose 2 [Amount: 42]
0 :- Exit
Choose: 2
<<< Dose Selected: 2
>>> Select the Age Group:
0 :- 45+ [Amount: 29]
1 :- 18+ [Amount: 13]
from bs4 import BeautifulSoup
import requests
def fetchWebsiteData(url_website):
web_page_data = ''
####
req = requests.get(url_website)
soup= BeautifulSoup(req.text,'html.parser')
####
return soup
def fetchAllPossibleData(web_page_data):
data_collaction_dict = {}
doses = {}
data_index = 0
### FOR LOOP BEGINS
for tr in web_page_data.find_all('tr'):
if data_index == 0:
#table header ignore
data_index += 1
continue
hospital_name = tr.find("td", class_="hospital_name").text
state_name = tr.find("td", class_="state_name").text
district_name = tr.find("td", class_="district_name").text
vaccine_name = tr.find("td", class_="vaccine_name").text
dose_num = tr.find("td", class_="dose_num").text
age = tr.find("td", class_="age").text
### dict = { Key: Value, Key: Value, ... }
data_collaction_dict[data_index] = {
"Hospital": hospital_name,
"State": state_name,
"District": district_name,
"Vaccine": vaccine_name,
"Dose": dose_num,
"Age": age
}
### Count "Dose 1" and "Dose 2"
if dose_num in doses:
doses[dose_num] += 1
else:
doses[dose_num] = 1
data_index += 1
### FOR LOOP ENDS
data_collaction_dict["AmountDose"] = doses
return data_collaction_dict
def fetchOnlyNumber(inputStr):
number = 0
for sChar in inputStr:
if sChar.isdigit():
number = (number * 10) + int(sChar)
else:
return -1
return number
def fetchUserInput():
### Integer only
while True:
print("Choose: ", end="")
usrInput = fetchOnlyNumber( input() )
if usrInput > 0:
return usrInput
else:
if usrInput == 0:
###Exit
return 0
else:
print("Wrong input! Only numbers are allowed!")
url_website = "https://www.mooc.e-yantra.org/task-spec/fetch-mock-covidpage"
web_page_data = fetchWebsiteData(url_website)
whole_data = fetchAllPossibleData(web_page_data)
dose_index = 1
print(">>> Select the Dose of Vaccination:")
for dose, amount in whole_data["AmountDose"].items():
print("{0} :- Dose {1} [Amount: {2}]".format(dose_index, dose, amount))
dose_index += 1
print("0 :- Exit")
doseNum = fetchUserInput()
if doseNum == 0:
exit(0)
print("<<< Dose Selected: {0}".format(doseNum))
print(">>> Select the Age Group:")
for key in whole_data.keys():
if key == "AmountDose":
continue
if int(whole_data[key]["Dose"]) == doseNum:
print( "Hospital: {0} | Vaccine: {1} | Dose: {2} | Age: {3}".format(
whole_data[key]["Hospital"],
whole_data[key]["Vaccine"],
whole_data[key]["Dose"],
whole_data[key]["Age"]))
print('-' * 100)
>>> Select the Dose of Vaccination:
1 :- Dose 1 [Amount: 33]
2 :- Dose 2 [Amount: 42]
0 :- Exit
Choose: 1
<<< Dose Selected: 1
>>> Select the Age Group:
Hospital: Apollo Hospital | Vaccine: Covaxin | Dose: 1 | Age: 45+
--------------------------------------------------------------------------------
Hospital: Springedge Care | Vaccine: Covaxin | Dose: 1 | Age: 18+
--------------------------------------------------------------------------------
Hospital: West Valley Medical Center | Vaccine: Covaxin | Dose: 1 | Age: 45+
--------------------------------------------------------------------------------
Hospital: Zenlife Clinic | Vaccine: Covaxin | Dose: 1 | Age: 18+
--------------------------------------------------------------------------------
Hospital: Family Wellness Center | Vaccine: Covishield | Dose: 1 | Age: 18+
--------------------------------------------------------------------------------
Hospital: Tranquil Valley Hospital Center | Vaccine: Covaxin | Dose: 1 | Age: 45+
--------------------------------------------------------------------------------
Hospital: SevenHills | Vaccine: Covishield | Dose: 1 | Age: 18+
--------------------------------------------------------------------------------
>选择接种剂量:
1:-剂量1[数量:33]
2:-剂量2[数量:42]
0:-退出
选择:2
>选择年龄组:
0:-45+[金额:29]
1:-18+[金额:13]
示例03:
from bs4 import BeautifulSoup
import requests
def fetchWebsiteData(url_website):
web_page_data = ''
####
req = requests.get(url_website)
soup= BeautifulSoup(req.text,'html.parser')
####
####Changes here
return soup
def fetchVaccineDoses(web_page_data):
dose_count = 0
vaccine_doses_dict = {}
####
doses = web_page_data.find_all('td', class_="dose_num")
####Changes here too
for dose in doses:
vaccine_doses_dict[dose_count] = "Dose " + dose.text
dose_count = dose_count + 1
####
return vaccine_doses_dict
####Changes here too - Complete your empty function with code -
####I hope it is your output what you want to see.
def fetchAgeGroup(web_page_data, doses):
age_count = 0
####
ages = web_page_data.find_all('td', class_="age")
for age in ages:
doses[age_count] = doses[age_count] + " | Age " + age.text
age_count = age_count + 1
####
return doses
url_website = "https://www.mooc.e-yantra.org/task-spec/fetch-mock-covidpage"
web_page_data = fetchWebsiteData(url_website)
VaccineDoses = fetchVaccineDoses(web_page_data)
Ages = fetchAgeGroup(web_page_data, VaccineDoses)
####Ages element 0 can delete because it contains no numbers
####It is only the table header
del Ages[0]
print(Ages)
>>> Select the Dose of Vaccination:
1 :- Dose 1 [Amount: 33]
2 :- Dose 2 [Amount: 42]
0 :- Exit
Choose: 2
<<< Dose Selected: 2
>>> Select the Age Group:
0 :- 45+ [Amount: 29]
1 :- 18+ [Amount: 13]
from bs4 import BeautifulSoup
import requests
def fetchWebsiteData(url_website):
web_page_data = ''
####
req = requests.get(url_website)
soup= BeautifulSoup(req.text,'html.parser')
####
return soup
def fetchAllPossibleData(web_page_data):
data_collaction_dict = {}
doses = {}
data_index = 0
### FOR LOOP BEGINS
for tr in web_page_data.find_all('tr'):
if data_index == 0:
#table header ignore
data_index += 1
continue
hospital_name = tr.find("td", class_="hospital_name").text
state_name = tr.find("td", class_="state_name").text
district_name = tr.find("td", class_="district_name").text
vaccine_name = tr.find("td", class_="vaccine_name").text
dose_num = tr.find("td", class_="dose_num").text
age = tr.find("td", class_="age").text
### dict = { Key: Value, Key: Value, ... }
data_collaction_dict[data_index] = {
"Hospital": hospital_name,
"State": state_name,
"District": district_name,
"Vaccine": vaccine_name,
"Dose": dose_num,
"Age": age
}
### Count "Dose 1" and "Dose 2"
if dose_num in doses:
doses[dose_num] += 1
else:
doses[dose_num] = 1
data_index += 1
### FOR LOOP ENDS
data_collaction_dict["AmountDose"] = doses
return data_collaction_dict
def fetchOnlyNumber(inputStr):
number = 0
for sChar in inputStr:
if sChar.isdigit():
number = (number * 10) + int(sChar)
else:
return -1
return number
def fetchUserInput():
### Integer only
while True:
print("Choose: ", end="")
usrInput = fetchOnlyNumber( input() )
if usrInput > 0:
return usrInput
else:
if usrInput == 0:
###Exit
return 0
else:
print("Wrong input! Only numbers are allowed!")
url_website = "https://www.mooc.e-yantra.org/task-spec/fetch-mock-covidpage"
web_page_data = fetchWebsiteData(url_website)
whole_data = fetchAllPossibleData(web_page_data)
dose_index = 1
print(">>> Select the Dose of Vaccination:")
for dose, amount in whole_data["AmountDose"].items():
print("{0} :- Dose {1} [Amount: {2}]".format(dose_index, dose, amount))
dose_index += 1
print("0 :- Exit")
doseNum = fetchUserInput()
if doseNum == 0:
exit(0)
print("<<< Dose Selected: {0}".format(doseNum))
print(">>> Select the Age Group:")
for key in whole_data.keys():
if key == "AmountDose":
continue
if int(whole_data[key]["Dose"]) == doseNum:
print( "Hospital: {0} | Vaccine: {1} | Dose: {2} | Age: {3}".format(
whole_data[key]["Hospital"],
whole_data[key]["Vaccine"],
whole_data[key]["Dose"],
whole_data[key]["Age"]))
print('-' * 100)
>>> Select the Dose of Vaccination:
1 :- Dose 1 [Amount: 33]
2 :- Dose 2 [Amount: 42]
0 :- Exit
Choose: 1
<<< Dose Selected: 1
>>> Select the Age Group:
Hospital: Apollo Hospital | Vaccine: Covaxin | Dose: 1 | Age: 45+
--------------------------------------------------------------------------------
Hospital: Springedge Care | Vaccine: Covaxin | Dose: 1 | Age: 18+
--------------------------------------------------------------------------------
Hospital: West Valley Medical Center | Vaccine: Covaxin | Dose: 1 | Age: 45+
--------------------------------------------------------------------------------
Hospital: Zenlife Clinic | Vaccine: Covaxin | Dose: 1 | Age: 18+
--------------------------------------------------------------------------------
Hospital: Family Wellness Center | Vaccine: Covishield | Dose: 1 | Age: 18+
--------------------------------------------------------------------------------
Hospital: Tranquil Valley Hospital Center | Vaccine: Covaxin | Dose: 1 | Age: 45+
--------------------------------------------------------------------------------
Hospital: SevenHills | Vaccine: Covishield | Dose: 1 | Age: 18+
--------------------------------------------------------------------------------
从bs4导入美化组
导入请求
def fetchWebsiteData(url_网站):
网页数据=“”
####
req=请求.获取(url\U网站)
soup=BeautifulSoup(请求文本,'html.parser')
####
返汤
def FETCHTALLPOSIBLEDATA(网页数据):
数据合并dict={}
剂量={}
数据索引=0
###FOR循环开始
对于web页面数据中的tr。查找所有('tr'):
如果数据_索引==0:
#表头忽略
数据索引+=1
持续
医院名称=tr.find(“td”,class=“医院名称”)。文本
state\u name=tr.find(“td”,class=“state\u name”).text
地区名称=tr.find(“td”,class=“地区名称”)。文本
疫苗名称=tr.find(“td”,class=“疫苗名称”).text
dose\u num=tr.find(“td”,class=“dose\u num”)。文本
年龄=tr.find(“td”,class=“age”)。文本
###dict={Key:Value,Key:Value,…}
数据合并目录[数据索引]={
“医院”:医院名称,
“州”:州名,
“地区”:地区名称,
“疫苗”:疫苗名称,
“剂量”:剂量,
“年龄”:年龄
}
###计算“剂量1”和“剂量2”
如果剂量为_num,则为剂量:
剂量[剂量数量]+=1
其他:
剂量[剂量数量]=1
数据索引+=1
###用于循环结束
数据汇总表[“数量剂量”]=剂量
返回数据合并记录
def FETCHTONLYNUMBER(inputStr):
数字=0
对于inputStr中的sChar:
如果sChar.isdigit():
数字=(数字*10)+整数(沙尔)
其他:
返回-1
返回号码
def fetchUserInput():
###仅限整数
尽管如此:
打印(“选择:”,结束=“”)
usrInput=fetchOnlyNumber(输入())
如果usrInput>0:
返回usrInput
其他:
如果usrInput==0:
###出口
返回0
其他:
打印(“输入错误!只允许数字!”)
url_网站=”https://www.mooc.e-yantra.org/task-spec/fetch-mock-covidpage"
网页数据