如何将print()设置为docx python
我有一个代码,可以获取selenium信息,我需要将这些信息打印到docx,但需要使用模板。在这里,我通过print()获取信息(设置某些部分)如何将print()设置为docx python,python,selenium,selenium-webdriver,Python,Selenium,Selenium Webdriver,我有一个代码,可以获取selenium信息,我需要将这些信息打印到docx,但需要使用模板。在这里,我通过print()获取信息(设置某些部分) Stuyvesant高中 一般信息 学校名称: Stuyvesant高中 负责人: 埃里克·孔特雷拉斯先生 负责人的电子邮件: ECONTRE@SCHOOLS.NYC.GOV 类型: 普通学校 等级跨度: 9-12 地址: 纽约州纽约市钱伯斯街345号,邮编10282 我在控制台中打印此信息,但我需要将此信息打印到docx 这里是我打印的代码部分:
Stuyvesant高中
一般信息 学校名称:
Stuyvesant高中
负责人:
埃里克·孔特雷拉斯先生
负责人的电子邮件:
ECONTRE@SCHOOLS.NYC.GOV
类型:
普通学校
等级跨度:
9-12
地址:
纽约州纽约市钱伯斯街345号,邮编10282 我在控制台中打印此信息,但我需要将此信息打印到docx 这里是我打印的代码部分:
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
import openpyxl
import docx
from docx.shared import Pt
List = []
wb = openpyxl.load_workbook('D:\INSPR\Rating_100_schools\Top-100.xlsx')
sheet = wb['RI']
tuple(sheet['A1':'A100']) # Get all cells from A1 to A100.
for rowOfCellObjects in sheet['A1':'A100']:
for cellObj in rowOfCellObjects:
List.append(cellObj.value)
School_list_result = []
State = sheet.title
driver = webdriver.Chrome(executable_path='D:\chromedriver') #any path
def check_xpath(xpath):
try:
element = driver.find_element_by_xpath(xpath)
School_list_result.append(element.text)
except NoSuchElementException:
School_list_result.append("No data.")
def check_text(partial_link_text):
try:
element_text = driver.find_element_by_partial_link_text(partial_link_text)
School_list_result.append(element_text.get_attribute("href"))
except NoSuchElementException:
School_list_result.append("No data.")
def check_click(clicker):
try:
element_click = driver.find_element_by_partial_link_text(clicker)
element_click.click()
except NoSuchElementException:
print("No click.")
def get_url(url, _xpath, send_keys):
driver.get(url)
try:
_element = driver.find_element_by_xpath(_xpath)
_element.clear()
driver.implicitly_wait(10)
_element.send_keys(schools, send_keys)
_element.send_keys(u'\ue007')
driver.implicitly_wait(10)
except NoSuchElementException:
print("No data.")
for schools in List[98:100]:
#-----------------------------------------GREAT SCHOOLS-------------------------------------------
get_url("https://www.google.com/", '//*[@id="tsf"]/div[2]/div[1]/div[1]/div/div[2]/input'," " + State + " greatschools")
_clicker = driver.find_element_by_xpath('//*[@id="rso"]/div[1]/div/div[1]/a/h3').click()
check_xpath('//*[@id="hero"]/div/div[1]/h1') #School Name
check_xpath('/html/body/div[6]/div[8]/div/div[1]/div/div/div[2]/div[1]/div[2]/span[1]') #Principal
check_text('Principal email') #Principal’s E-mail
check_xpath('//*[@id="hero"]/div/div[2]/div[2]/div[3]/div[2]') #Grade Span
check_xpath('//*[@id="hero"]/div/div[2]/div[1]/div[1]/div[1]/div[1]/a/div/span[2]') #Address
check_xpath('/html/body/div[6]/div[8]/div/div[1]/div/div/div[2]/div[2]/span/a') #Phone
check_text('Website') #Website
check_xpath('//*[@id="hero"]/div/div[2]/div[1]/div[1]/div[1]/div[2]/a') #Associations/Communities
check_xpath('//*[@id="hero"]/div/div[2]/div[2]/div[1]/div/a/div[1]/div') #GreatSchools Rating
check_xpath('//*[@id="Students"]/div/div[2]/div[1]/div[2]') #Enrollment by Race/Ethnicity
#-----------------------------------------NCES-------------------------------------------
driver.implicitly_wait(10)
get_url("https://nces.ed.gov/search/index.asp?q=&btnG=Search#gsc.tab=0", '//*[@id="qt"]', " " + State)
check_click('Search for Public Schools - ')
driver.implicitly_wait(10)
check_xpath('/html/body/div[1]/div[3]/table/tbody/tr[4]/td/table/tbody/tr[7]/td[1]/font[2]') #School type
check_xpath('/html/body/div[1]/div[3]/table/tbody/tr[4]/td/table/tbody/tr[7]/td[3]/font') #Charter
check_xpath('/html/body/div[1]/div[3]/table/tbody/tr[12]/td/table/tbody/tr[3]/td/table/tbody/tr[2]/td/table/tbody')
#Enrollment by Gender
check_xpath('/html/body/div[1]/div[3]/table/tbody/tr[12]/td/table/tbody/tr[1]/td/table/tbody/tr[2]') #Enrollment by Grade
#-----------------------------------------USNEWS-------------------------------------------
driver.implicitly_wait(10)
url = "https://www.usnews.com/education/best-high-schools/new-york/rankings"
driver.get(url)
check_click(schools)
driver.implicitly_wait(10)
check_xpath('//*[@id="app"]/div/div/div/div[1]/div/div/div[2]/div[1]/div[2]/p[3]') #U.S.News Rankings
#-----------------------------------------PUBLIC REVIEW-------------------------------------------
driver.implicitly_wait(10)
get_url("https://www.google.com/", '//*[@id="tsf"]/div[2]/div[1]/div[1]/div/div[2]/input', " " + State + " publicschoolreview")
clicker = driver.find_element_by_partial_link_text('(2020)').click()
driver.implicitly_wait(10)
check_xpath('//*[@id="quick_stats"]/div/div[2]/ul/li[2]/strong') #Total # Students
check_xpath('//*[@id="total_teachers_data_row"]/td[2]') #Full-Time Teachers
check_xpath('//*[@id="quick_stats"]/div/div[2]/ul/li[3]/strong') #Student/Teacher Ratio
#-----------------------------------------PRINT INFOFMATION-------------------------------------------
print(" ---------------------------------------------------------------"+"\n",
" \033[1m", schools,"\033[0m"+"\n",
" ---------------------------------------------------------------"+"\n",
" \033[1mGeneral Information\033[0m "+"\n",
"\033[1mSchool Name:\n\033[0m",School_list_result[0]+"\n",
"\033[1mPrincipal:\n\033[0m",School_list_result[1]+"\n",
"\033[1mPrincipal’s E-mail:\n\033[0m",School_list_result[2]+"\n",
"\033[1mType:\n\033[0m",School_list_result[10]+"\n",
"\033[1mGrade Span:\n\033[0m",School_list_result[3]+"\n",
"\033[1mAddress:\n\033[0m",School_list_result[4]+"\n",
"\033[1mPhone:\n\033[0m",School_list_result[5]+"\n",
"\033[1mWebsite:\n\033[0m",School_list_result[6]+"\n",
"\033[1mAssociations/Communities:\n\033[0m",School_list_result[7]+"\n",
"\033[1mGreatSchools Summary Rating:\n\033[0m",School_list_result[8]+"\n",
"\033[1mU.S.News Rankings:\n\033[0m",School_list_result[14]+"\n",
" \033[1mSchool Details\033[0m"+"\n",
"\033[1mTotal # Students:\n\033[0m",School_list_result[15]+"\n",
"\033[1mFull-Time Teachers:\n\033[0m",School_list_result[16]+"\n",
"\033[1mStudent/Teacher Ratio:\n\033[0m",School_list_result[17]+"\n",
"\033[1mCharter:\n\033[0m",School_list_result[11]+"\n",
"\033[1mMagnet: \n\033[0m","No""\n",
" \033[1mEnrollment Data\033[0m"+"\n",
"\033[1mEnrollment by Race/Ethnicity: \n\033[0m",School_list_result[9]+"\n",
"\033[1mEnrollment by Gender: \n\033[0m",School_list_result[12]+"\n",
"\033[1mEnrollment by Grade: \n\033[0m",School_list_result[13]+"\n",
()
)
print()
School_list_result.clear()
我需要的是:不是按模板将结果打印到控制台中,而是按模板打印到docx中。
还有一点:如果您知道如何不使用索引(如:学校列表\u结果[0]),请告诉我。我假设您和我一样使用windows操作系统,并且知道如何下载python包:
docx
和python docx
模块(它们是不同的,请确保您同时安装了这两个模块)
学校列表\u结果
,请重复该列表,下面是一个示例:list\u of_school\u list\u result
的列表,将您的数据转储到其中,每个列表都应该是某个学校的一条记录docx
和python docx
,运行代码,当要求您输入路径时,将其从剪贴板中粘贴。(请确保使用绝对路径,即根为c的完整目录,相对路径可能不起作用)python docx
module对其进行编辑
PS:下面的结果类提供了一种创建学校列表结果的清晰方法:
class Result:
def __init__(self, length):
self.l = length
self.res = []
self.col = []
def push(self, string):
self.col.append(string)
if(len(self.col) == self.l):
self.res.append(self.col)
self.col = []
def publish(self):
return self.res
r = Result(6) # pass in the length of the headers, then all you need, is to call `r.push()` over and over again. after that, assign it to `List_of_school_list_result`
r.push('school name 1')
r.push('principal name 1')
r.push('principal email 1')
r.push('school type 1')
r.push('grad span 1')
r.push('address 1')
r.push('school name 2')
r.push('principal name 2')
r.push('principal email 2')
r.push('school type 2')
r.push('grad span 2')
r.push('address 2')
List_of_school_list_result = r.publish()
代码的完整版本:
headers = [
"School Name: ",
"Principal: ",
"Principal's Email: ",
"Type: ",
"Grade Span: ",
"Address: ",
]
class Result:
def __init__(self, length):
self.l = length
self.res = []
self.col = []
def push(self, string):
self.col.append(string)
if(len(self.col) == self.l):
self.res.append(self.col)
self.col = []
def publish(self):
return self.res
r = Result(len(headers))
# call r.push() over and over again, until all the string data is passed in.
''' for example
r.push('school name 1')
r.push('principal name 1')
r.push('principal email 1')
r.push('school type 1')
r.push('grad span 1')
r.push('address 1')
r.push('school name 2')
r.push('principal name 2')
r.push('principal email 2')
r.push('school type 2')
r.push('grad span 2')
r.push('address 2')
'''
List_of_school_list_result = r.publish()
def print_all_into_one_doc():
import os
from docx import Document
from docx.shared import RGBColor
from docx.shared import Pt
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
# after you create a new docx file, double click to open it, write some stuff, press ctrl + s, delete what you have written, press ctrl + s, close the document
# Otherwise python-docx reports a Package Note Find Error.
p = input('hold shift key right click, copy and paste the file path of docx here: ')
if p[0] == '"' or p[0] == "'":
# validate path
p = p[1:-1]
p = os.path.abspath(p)
doc = Document(p)
# iterate List of all school
for j in List_of_school_list_result:
h = doc.add_paragraph()
# make title align to center
h.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
r = h.add_run(j[0])
# set title color: you can adjust any color of title here
r.font.color.rgb = RGBColor(54, 95, 145)
# set title size
r.font.size = Pt(36)
doc.add_paragraph('\n')
su = doc.add_paragraph()
ru = su.add_run('General Information')
ru.font.size = Pt(30)
for i, d in enumerate(headers):
sp = doc.add_paragraph()
rp = sp.add_run(headers[i])
rp.bold = True
rp.font.size = Pt(23)
sm = doc.add_paragraph()
rm = sm.add_run(j[i])
rm.font.size = Pt(22)
rm.italic = True
doc.add_page_break()
doc.save(p)
print_all_into_one_doc()
评论不适用于扩展讨论或调试会话;这段对话已经结束。请确保答案中包含所有相关信息。评论不用于进一步讨论;这段对话已经结束。
class Result:
def __init__(self, length):
self.l = length
self.res = []
self.col = []
def push(self, string):
self.col.append(string)
if(len(self.col) == self.l):
self.res.append(self.col)
self.col = []
def publish(self):
return self.res
r = Result(6) # pass in the length of the headers, then all you need, is to call `r.push()` over and over again. after that, assign it to `List_of_school_list_result`
r.push('school name 1')
r.push('principal name 1')
r.push('principal email 1')
r.push('school type 1')
r.push('grad span 1')
r.push('address 1')
r.push('school name 2')
r.push('principal name 2')
r.push('principal email 2')
r.push('school type 2')
r.push('grad span 2')
r.push('address 2')
List_of_school_list_result = r.publish()
headers = [
"School Name: ",
"Principal: ",
"Principal's Email: ",
"Type: ",
"Grade Span: ",
"Address: ",
]
class Result:
def __init__(self, length):
self.l = length
self.res = []
self.col = []
def push(self, string):
self.col.append(string)
if(len(self.col) == self.l):
self.res.append(self.col)
self.col = []
def publish(self):
return self.res
r = Result(len(headers))
# call r.push() over and over again, until all the string data is passed in.
''' for example
r.push('school name 1')
r.push('principal name 1')
r.push('principal email 1')
r.push('school type 1')
r.push('grad span 1')
r.push('address 1')
r.push('school name 2')
r.push('principal name 2')
r.push('principal email 2')
r.push('school type 2')
r.push('grad span 2')
r.push('address 2')
'''
List_of_school_list_result = r.publish()
def print_all_into_one_doc():
import os
from docx import Document
from docx.shared import RGBColor
from docx.shared import Pt
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
# after you create a new docx file, double click to open it, write some stuff, press ctrl + s, delete what you have written, press ctrl + s, close the document
# Otherwise python-docx reports a Package Note Find Error.
p = input('hold shift key right click, copy and paste the file path of docx here: ')
if p[0] == '"' or p[0] == "'":
# validate path
p = p[1:-1]
p = os.path.abspath(p)
doc = Document(p)
# iterate List of all school
for j in List_of_school_list_result:
h = doc.add_paragraph()
# make title align to center
h.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
r = h.add_run(j[0])
# set title color: you can adjust any color of title here
r.font.color.rgb = RGBColor(54, 95, 145)
# set title size
r.font.size = Pt(36)
doc.add_paragraph('\n')
su = doc.add_paragraph()
ru = su.add_run('General Information')
ru.font.size = Pt(30)
for i, d in enumerate(headers):
sp = doc.add_paragraph()
rp = sp.add_run(headers[i])
rp.bold = True
rp.font.size = Pt(23)
sm = doc.add_paragraph()
rm = sm.add_run(j[i])
rm.font.size = Pt(22)
rm.italic = True
doc.add_page_break()
doc.save(p)
print_all_into_one_doc()