Warning: file_get_contents(/data/phpspider/zhask/data//catemap/8/selenium/4.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
如何将print()设置为docx python_Python_Selenium_Selenium Webdriver - Fatal编程技术网

如何将print()设置为docx python

如何将print()设置为docx python,python,selenium,selenium-webdriver,Python,Selenium,Selenium Webdriver,我有一个代码,可以获取selenium信息,我需要将这些信息打印到docx,但需要使用模板。在这里,我通过print()获取信息(设置某些部分) Stuyvesant高中 一般信息 学校名称: Stuyvesant高中 负责人: 埃里克·孔特雷拉斯先生 负责人的电子邮件: ECONTRE@SCHOOLS.NYC.GOV 类型: 普通学校 等级跨度: 9-12 地址: 纽约州纽约市钱伯斯街345号,邮编10282 我在控制台中打印此信息,但我需要将此信息打印到docx 这里是我打印的代码部分:

我有一个代码,可以获取selenium信息,我需要将这些信息打印到docx,但需要使用模板。在这里,我通过print()获取信息(设置某些部分)


Stuyvesant高中


一般信息

学校名称:
Stuyvesant高中
负责人:
埃里克·孔特雷拉斯先生
负责人的电子邮件:
ECONTRE@SCHOOLS.NYC.GOV
类型:
普通学校
等级跨度:
9-12
地址:
纽约州纽约市钱伯斯街345号,邮编10282

我在控制台中打印此信息,但我需要将此信息打印到docx

这里是我打印的代码部分:

from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
import openpyxl
import docx
from docx.shared import Pt

List = []
wb = openpyxl.load_workbook('D:\INSPR\Rating_100_schools\Top-100.xlsx')
sheet = wb['RI']
tuple(sheet['A1':'A100']) # Get all cells from A1 to A100.
for rowOfCellObjects in sheet['A1':'A100']:
    for cellObj in rowOfCellObjects:
        List.append(cellObj.value)

School_list_result = []
State = sheet.title

driver = webdriver.Chrome(executable_path='D:\chromedriver') #any path

def check_xpath(xpath):
        try:
            element = driver.find_element_by_xpath(xpath)
            School_list_result.append(element.text)
        except NoSuchElementException:
            School_list_result.append("No data.")
    
def check_text(partial_link_text):
        try:
            element_text = driver.find_element_by_partial_link_text(partial_link_text)
            School_list_result.append(element_text.get_attribute("href"))
        except NoSuchElementException:
            School_list_result.append("No data.")
            
def check_click(clicker):
        try:
            element_click = driver.find_element_by_partial_link_text(clicker)
            element_click.click()
        except NoSuchElementException:
            print("No click.")
            
def get_url(url, _xpath, send_keys):
    driver.get(url)
    try:
            _element = driver.find_element_by_xpath(_xpath)
            _element.clear()
            driver.implicitly_wait(10)
            _element.send_keys(schools, send_keys)
            _element.send_keys(u'\ue007')
            driver.implicitly_wait(10)
    except NoSuchElementException:
            print("No data.")

for schools in List[98:100]:
    
    #-----------------------------------------GREAT SCHOOLS-------------------------------------------
    get_url("https://www.google.com/", '//*[@id="tsf"]/div[2]/div[1]/div[1]/div/div[2]/input'," " + State + " greatschools")
    _clicker = driver.find_element_by_xpath('//*[@id="rso"]/div[1]/div/div[1]/a/h3').click()
    
    check_xpath('//*[@id="hero"]/div/div[1]/h1') #School Name
    
    check_xpath('/html/body/div[6]/div[8]/div/div[1]/div/div/div[2]/div[1]/div[2]/span[1]') #Principal
    
    check_text('Principal email') #Principal’s E-mail
    
    check_xpath('//*[@id="hero"]/div/div[2]/div[2]/div[3]/div[2]') #Grade Span
    
    check_xpath('//*[@id="hero"]/div/div[2]/div[1]/div[1]/div[1]/div[1]/a/div/span[2]') #Address
    
    check_xpath('/html/body/div[6]/div[8]/div/div[1]/div/div/div[2]/div[2]/span/a') #Phone
   
    check_text('Website') #Website
   
    check_xpath('//*[@id="hero"]/div/div[2]/div[1]/div[1]/div[1]/div[2]/a') #Associations/Communities
   
    check_xpath('//*[@id="hero"]/div/div[2]/div[2]/div[1]/div/a/div[1]/div') #GreatSchools Rating
 
    check_xpath('//*[@id="Students"]/div/div[2]/div[1]/div[2]') #Enrollment by Race/Ethnicity
            
    #-----------------------------------------NCES-------------------------------------------
    
    driver.implicitly_wait(10)
    get_url("https://nces.ed.gov/search/index.asp?q=&btnG=Search#gsc.tab=0", '//*[@id="qt"]', " " + State)
    check_click('Search for Public Schools - ')
    driver.implicitly_wait(10) 
    
    check_xpath('/html/body/div[1]/div[3]/table/tbody/tr[4]/td/table/tbody/tr[7]/td[1]/font[2]') #School type
    
    check_xpath('/html/body/div[1]/div[3]/table/tbody/tr[4]/td/table/tbody/tr[7]/td[3]/font') #Charter
    
    check_xpath('/html/body/div[1]/div[3]/table/tbody/tr[12]/td/table/tbody/tr[3]/td/table/tbody/tr[2]/td/table/tbody')
    #Enrollment by Gender
    
    check_xpath('/html/body/div[1]/div[3]/table/tbody/tr[12]/td/table/tbody/tr[1]/td/table/tbody/tr[2]') #Enrollment by Grade
    
    #-----------------------------------------USNEWS-------------------------------------------
    driver.implicitly_wait(10)
    url = "https://www.usnews.com/education/best-high-schools/new-york/rankings"
    driver.get(url)
    check_click(schools)
    driver.implicitly_wait(10)
    
    check_xpath('//*[@id="app"]/div/div/div/div[1]/div/div/div[2]/div[1]/div[2]/p[3]') #U.S.News Rankings
            
    #-----------------------------------------PUBLIC REVIEW-------------------------------------------
    driver.implicitly_wait(10)
    get_url("https://www.google.com/", '//*[@id="tsf"]/div[2]/div[1]/div[1]/div/div[2]/input', " " +  State + " publicschoolreview")
    clicker = driver.find_element_by_partial_link_text('(2020)').click()
    driver.implicitly_wait(10)
    
    check_xpath('//*[@id="quick_stats"]/div/div[2]/ul/li[2]/strong') #Total # Students
   
    check_xpath('//*[@id="total_teachers_data_row"]/td[2]') #Full-Time Teachers
        
    check_xpath('//*[@id="quick_stats"]/div/div[2]/ul/li[3]/strong') #Student/Teacher Ratio
        
    #-----------------------------------------PRINT INFOFMATION-------------------------------------------
    
    print("         ---------------------------------------------------------------"+"\n",
          "                              \033[1m", schools,"\033[0m"+"\n",
          "         ---------------------------------------------------------------"+"\n",
          "                              \033[1mGeneral Information\033[0m        "+"\n",
          "\033[1mSchool Name:\n\033[0m",School_list_result[0]+"\n",
          "\033[1mPrincipal:\n\033[0m",School_list_result[1]+"\n",
          "\033[1mPrincipal’s E-mail:\n\033[0m",School_list_result[2]+"\n",
          "\033[1mType:\n\033[0m",School_list_result[10]+"\n",
          "\033[1mGrade Span:\n\033[0m",School_list_result[3]+"\n",
          "\033[1mAddress:\n\033[0m",School_list_result[4]+"\n",
          "\033[1mPhone:\n\033[0m",School_list_result[5]+"\n",
          "\033[1mWebsite:\n\033[0m",School_list_result[6]+"\n",
          "\033[1mAssociations/Communities:\n\033[0m",School_list_result[7]+"\n",
          "\033[1mGreatSchools Summary Rating:\n\033[0m",School_list_result[8]+"\n",
          "\033[1mU.S.News Rankings:\n\033[0m",School_list_result[14]+"\n",
          "                              \033[1mSchool Details\033[0m"+"\n",
          "\033[1mTotal # Students:\n\033[0m",School_list_result[15]+"\n",
          "\033[1mFull-Time Teachers:\n\033[0m",School_list_result[16]+"\n",
          "\033[1mStudent/Teacher Ratio:\n\033[0m",School_list_result[17]+"\n",
          "\033[1mCharter:\n\033[0m",School_list_result[11]+"\n",
          "\033[1mMagnet: \n\033[0m","No""\n",
          "                              \033[1mEnrollment Data\033[0m"+"\n",
          "\033[1mEnrollment by Race/Ethnicity: \n\033[0m",School_list_result[9]+"\n",
          "\033[1mEnrollment by Gender: \n\033[0m",School_list_result[12]+"\n",
          "\033[1mEnrollment by Grade: \n\033[0m",School_list_result[13]+"\n",
          ()
         )
    
    
    print()
    
    School_list_result.clear()
我需要的是:不是按模板将结果打印到控制台中,而是按模板打印到docx中。
还有一点:如果您知道如何不使用索引(如:学校列表\u结果[0]),请告诉我。

我假设您和我一样使用windows操作系统,并且知道如何下载python包:

  • 安装
    docx
    python docx
    模块(它们是不同的,请确保您同时安装了这两个模块)

  • 使用以下代码:

  • 如果您有一个列表,其中包含
    学校列表\u结果
    ,请重复该列表,下面是一个示例:
  • 让我们简单一点,您需要做的是:

  • 创建一个名为
    list\u of_school\u list\u result
    的列表,将您的数据转储到其中,每个列表都应该是某个学校的一条记录
  • 在任何位置,创建一个新的docx文件,双击打开它,写一些东西,按ctrl+s,删除你写的东西,按ctrl+s,关闭文档
  • 转到docx文件所在的目录,按住shift键,右键单击,复制为路径
  • 确保安装了
    docx
    python docx
    ,运行代码,当要求您输入路径时,将其从剪贴板中粘贴。(请确保使用绝对路径,即根为c的完整目录,相对路径可能不起作用)
  • PS:创建后必须打开docx文件的原因是Microsoft Word 2005+docx文件有3种模式。首先,如果它在创建后是全新的,那么它是二进制格式的。其次,如果我们打开它进行编辑,它会生成一个$cache.docx文件,隐藏在同一级别的目录中,以确保性能和数据安全,以防崩溃。第三,如果对其进行编辑和保存,格式将转换为XML,可以使用
    python docx
    module对其进行编辑

    PS:下面的结果类提供了一种创建学校列表结果的清晰方法:

    class Result:
        def __init__(self, length):
            self.l = length
            self.res = []
            self.col = []
    
        def push(self, string):
            self.col.append(string)
            if(len(self.col) == self.l):
                self.res.append(self.col)
                self.col = []
    
        def publish(self):
            return self.res
    
    
    r = Result(6) # pass in the length of the headers, then all you need, is to call `r.push()` over and over again. after that, assign it to `List_of_school_list_result`
    r.push('school name 1')
    r.push('principal name 1')
    r.push('principal email 1')
    r.push('school type 1')
    r.push('grad span 1')
    r.push('address 1')
    
    r.push('school name 2')
    r.push('principal name 2')
    r.push('principal email 2')
    r.push('school type 2')
    r.push('grad span 2')
    r.push('address 2')
    List_of_school_list_result = r.publish()
    
    代码的完整版本:

    headers = [
        "School Name: ",
        "Principal: ",
        "Principal's Email: ",
        "Type: ",
        "Grade Span: ",
        "Address: ",
    ]
    
    class Result:
        def __init__(self, length):
            self.l = length
            self.res = []
            self.col = []
    
        def push(self, string):
            self.col.append(string)
            if(len(self.col) == self.l):
                self.res.append(self.col)
                self.col = []
    
        def publish(self):
            return self.res
    
    
    r = Result(len(headers))
    
    # call r.push() over and over again, until all the string data is passed in.
    
    ''' for example
    r.push('school name 1')
    r.push('principal name 1')
    r.push('principal email 1')
    r.push('school type 1')
    r.push('grad span 1')
    r.push('address 1')
    
    r.push('school name 2')
    r.push('principal name 2')
    r.push('principal email 2')
    r.push('school type 2')
    r.push('grad span 2')
    r.push('address 2')
    '''
    
    List_of_school_list_result = r.publish()
    
    
    def print_all_into_one_doc():
        import os
        from docx import Document
        from docx.shared import RGBColor
        from docx.shared import Pt
        from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
    
        # after you create a new docx file, double click to open it, write some stuff, press ctrl + s, delete what you have written, press ctrl + s, close the document
        # Otherwise python-docx reports a Package Note Find Error. 
        p = input('hold shift key right click, copy and paste the file path of docx here: ')
        if p[0] == '"' or p[0] == "'":
            # validate path
            p = p[1:-1]
        p = os.path.abspath(p)
        doc = Document(p)
        # iterate List of all school
        for j in List_of_school_list_result:
            h = doc.add_paragraph()
            # make title align to center
            h.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
            r = h.add_run(j[0])
            # set title color: you can adjust any color of title here 
            r.font.color.rgb = RGBColor(54, 95, 145)
            # set title size
            r.font.size = Pt(36)
            doc.add_paragraph('\n')
            su = doc.add_paragraph()
            ru = su.add_run('General Information')
            ru.font.size = Pt(30)
            for i, d in enumerate(headers):
                sp = doc.add_paragraph()
                rp = sp.add_run(headers[i])
                rp.bold = True
                rp.font.size = Pt(23)
                sm = doc.add_paragraph()
                rm = sm.add_run(j[i])
                rm.font.size = Pt(22)
                rm.italic = True
            doc.add_page_break()
        doc.save(p)
    
    
    print_all_into_one_doc()
    

    评论不适用于扩展讨论或调试会话;这段对话已经结束。请确保答案中包含所有相关信息。评论不用于进一步讨论;这段对话已经结束。
    class Result:
        def __init__(self, length):
            self.l = length
            self.res = []
            self.col = []
    
        def push(self, string):
            self.col.append(string)
            if(len(self.col) == self.l):
                self.res.append(self.col)
                self.col = []
    
        def publish(self):
            return self.res
    
    
    r = Result(6) # pass in the length of the headers, then all you need, is to call `r.push()` over and over again. after that, assign it to `List_of_school_list_result`
    r.push('school name 1')
    r.push('principal name 1')
    r.push('principal email 1')
    r.push('school type 1')
    r.push('grad span 1')
    r.push('address 1')
    
    r.push('school name 2')
    r.push('principal name 2')
    r.push('principal email 2')
    r.push('school type 2')
    r.push('grad span 2')
    r.push('address 2')
    List_of_school_list_result = r.publish()
    
    headers = [
        "School Name: ",
        "Principal: ",
        "Principal's Email: ",
        "Type: ",
        "Grade Span: ",
        "Address: ",
    ]
    
    class Result:
        def __init__(self, length):
            self.l = length
            self.res = []
            self.col = []
    
        def push(self, string):
            self.col.append(string)
            if(len(self.col) == self.l):
                self.res.append(self.col)
                self.col = []
    
        def publish(self):
            return self.res
    
    
    r = Result(len(headers))
    
    # call r.push() over and over again, until all the string data is passed in.
    
    ''' for example
    r.push('school name 1')
    r.push('principal name 1')
    r.push('principal email 1')
    r.push('school type 1')
    r.push('grad span 1')
    r.push('address 1')
    
    r.push('school name 2')
    r.push('principal name 2')
    r.push('principal email 2')
    r.push('school type 2')
    r.push('grad span 2')
    r.push('address 2')
    '''
    
    List_of_school_list_result = r.publish()
    
    
    def print_all_into_one_doc():
        import os
        from docx import Document
        from docx.shared import RGBColor
        from docx.shared import Pt
        from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
    
        # after you create a new docx file, double click to open it, write some stuff, press ctrl + s, delete what you have written, press ctrl + s, close the document
        # Otherwise python-docx reports a Package Note Find Error. 
        p = input('hold shift key right click, copy and paste the file path of docx here: ')
        if p[0] == '"' or p[0] == "'":
            # validate path
            p = p[1:-1]
        p = os.path.abspath(p)
        doc = Document(p)
        # iterate List of all school
        for j in List_of_school_list_result:
            h = doc.add_paragraph()
            # make title align to center
            h.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
            r = h.add_run(j[0])
            # set title color: you can adjust any color of title here 
            r.font.color.rgb = RGBColor(54, 95, 145)
            # set title size
            r.font.size = Pt(36)
            doc.add_paragraph('\n')
            su = doc.add_paragraph()
            ru = su.add_run('General Information')
            ru.font.size = Pt(30)
            for i, d in enumerate(headers):
                sp = doc.add_paragraph()
                rp = sp.add_run(headers[i])
                rp.bold = True
                rp.font.size = Pt(23)
                sm = doc.add_paragraph()
                rm = sm.add_run(j[i])
                rm.font.size = Pt(22)
                rm.italic = True
            doc.add_page_break()
        doc.save(p)
    
    
    print_all_into_one_doc()