Python 3.x 用逗号分隔xpath返回的元素

Python 3.x 用逗号分隔xpath返回的元素,python-3.x,selenium,xpath,selenium-chromedriver,Python 3.x,Selenium,Xpath,Selenium Chromedriver,我有一个简单的函数,返回通过xpath从网站检索到的表的内容: import traceback from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.proxy import Proxy, ProxyType def get_hotbird_13e(): # Downloads an up to da

我有一个简单的函数,返回通过xpath从网站检索到的表的内容:

import traceback
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.proxy import Proxy, ProxyType



def get_hotbird_13e():


    # Downloads an up to date channel/country map for Hotbird 13e.

    try:

        chrome_options = Options()  
        chrome_options.add_argument("--headless")

        driver = webdriver.Chrome(chrome_options=chrome_options)
        driver.get("http://www.eutelsat.com/deploy_tvLineUp/struts/advancedSearch.do?orbitalPositionId=13%B0%20EAST&Langue=EN")

        link_xpath = '/html/body/div[1]/div[3]/div/table'
        link_path = driver.find_element_by_xpath(link_xpath).text

        driver.quit()

        print(link_path)


    except Exception as exc:

        print(traceback.format_exc())


get_hotbird_13e()
…但是,这将返回xpath中表的所有元素,其中空格用作分隔符。由于某些字段值中有空格,我无法单独访问字段值

我需要在我的代码中修改什么,以便以下示例输出:

TVN TURBO TVN 13° EAST HOTBIRD 13C POLISH HD CONAX / IRDETO / MEDIAGUARD / NAGRAVISION / VIACCESS
…变成:

TVN TURBO, TVN, 13° EAST, HOTBIRD 13C, POLISH, HD, CONAX / IRDETO / MEDIAGUARD / NAGRAVISION / VIACCESS

谢谢

如果希望将表中的每个单元格分开,则必须使用指向每个单元格的xpath。试试这个办法,

link_xpath = '/html/body/div[1]/div[3]/div/table//tr/td'
cells = driver.find_elements_by_xpath(link_xpath)
for cell in cells:
    print(cell.text)

如果希望将表中的每个单元格分开,则必须使用指向每个单元格的xpath。试试这个办法,

link_xpath = '/html/body/div[1]/div[3]/div/table//tr/td'
cells = driver.find_elements_by_xpath(link_xpath)
for cell in cells:
    print(cell.text)

按行获取数据,然后获取列表中的所有列值,然后用“,”联接

代码

from selenium.webdriver.common.by import By
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

chrome_options = Options()
chrome_options.add_argument("--headless")
driver = webdriver.Chrome(chrome_options=chrome_options)
driver.get("http://www.eutelsat.com/deploy_tvLineUp/struts/advancedSearch.do?orbitalPositionId=13%B0%20EAST&Langue=EN")
WebDriverWait(driver,15).until(EC.presence_of_element_located((By.CSS_SELECTOR,".listresult")))

for row in driver.find_elements_by_xpath("//table[@class='listresult']//tr")[1:]:
    rowwisedata=[td.text.strip() for td in row.find_elements_by_xpath(".//td") if td.text!=""]
    print(','.join(rowwisedata))
112 UKRAÏNA,Globecast,13° EAST,HOTBIRD 13C,UKRAINIAN,HD,CLEAR
13 ULICA,Cyfrowy Polsat,13° EAST,HOTBIRD 13C,POLISH,HD,CONAX / IRDETO / MEDIAGUARD / NAGRAVISION / VIACCESS
20 MEDIASET,Mediaset,13° EAST,HOTBIRD 13C,ITALIAN,SD,NAGRAVISION / VIDEOGUARD
20 MEDIASET,Mediaset,13° EAST,HOTBIRD 13E,ITALIAN,HD,NAGRAVISION / VIDEOGUARD
2M MONDE,Globecast,13° EAST,HOTBIRD 13B,ARABIC,SD,CLEAR,GENERAL
2M MONDE,Globecast,13° EAST,HOTBIRD 13C,ARABIC,SD,CLEAR,GENERAL
4 FUN DANCE,Cyfrowy Polsat,13° EAST,HOTBIRD 13C,POLISH,SD,CLEAR,MUSIC
4 FUN GOLD,Cyfrowy Polsat,13° EAST,HOTBIRD 13C,POLISH,SD,CLEAR,MUSIC
4 FUN TV,Cyfrowy Polsat,13° EAST,HOTBIRD 13C,POLISH,SD,CLEAR,MUSIC
6TER,Bis TV,13° EAST,HOTBIRD 13B,FRENCH,SD,VIACCESS

And so on....
输出

from selenium.webdriver.common.by import By
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

chrome_options = Options()
chrome_options.add_argument("--headless")
driver = webdriver.Chrome(chrome_options=chrome_options)
driver.get("http://www.eutelsat.com/deploy_tvLineUp/struts/advancedSearch.do?orbitalPositionId=13%B0%20EAST&Langue=EN")
WebDriverWait(driver,15).until(EC.presence_of_element_located((By.CSS_SELECTOR,".listresult")))

for row in driver.find_elements_by_xpath("//table[@class='listresult']//tr")[1:]:
    rowwisedata=[td.text.strip() for td in row.find_elements_by_xpath(".//td") if td.text!=""]
    print(','.join(rowwisedata))
112 UKRAÏNA,Globecast,13° EAST,HOTBIRD 13C,UKRAINIAN,HD,CLEAR
13 ULICA,Cyfrowy Polsat,13° EAST,HOTBIRD 13C,POLISH,HD,CONAX / IRDETO / MEDIAGUARD / NAGRAVISION / VIACCESS
20 MEDIASET,Mediaset,13° EAST,HOTBIRD 13C,ITALIAN,SD,NAGRAVISION / VIDEOGUARD
20 MEDIASET,Mediaset,13° EAST,HOTBIRD 13E,ITALIAN,HD,NAGRAVISION / VIDEOGUARD
2M MONDE,Globecast,13° EAST,HOTBIRD 13B,ARABIC,SD,CLEAR,GENERAL
2M MONDE,Globecast,13° EAST,HOTBIRD 13C,ARABIC,SD,CLEAR,GENERAL
4 FUN DANCE,Cyfrowy Polsat,13° EAST,HOTBIRD 13C,POLISH,SD,CLEAR,MUSIC
4 FUN GOLD,Cyfrowy Polsat,13° EAST,HOTBIRD 13C,POLISH,SD,CLEAR,MUSIC
4 FUN TV,Cyfrowy Polsat,13° EAST,HOTBIRD 13C,POLISH,SD,CLEAR,MUSIC
6TER,Bis TV,13° EAST,HOTBIRD 13B,FRENCH,SD,VIACCESS

And so on....

按行获取数据,然后获取列表中的所有列值,然后用“,”联接

代码

from selenium.webdriver.common.by import By
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

chrome_options = Options()
chrome_options.add_argument("--headless")
driver = webdriver.Chrome(chrome_options=chrome_options)
driver.get("http://www.eutelsat.com/deploy_tvLineUp/struts/advancedSearch.do?orbitalPositionId=13%B0%20EAST&Langue=EN")
WebDriverWait(driver,15).until(EC.presence_of_element_located((By.CSS_SELECTOR,".listresult")))

for row in driver.find_elements_by_xpath("//table[@class='listresult']//tr")[1:]:
    rowwisedata=[td.text.strip() for td in row.find_elements_by_xpath(".//td") if td.text!=""]
    print(','.join(rowwisedata))
112 UKRAÏNA,Globecast,13° EAST,HOTBIRD 13C,UKRAINIAN,HD,CLEAR
13 ULICA,Cyfrowy Polsat,13° EAST,HOTBIRD 13C,POLISH,HD,CONAX / IRDETO / MEDIAGUARD / NAGRAVISION / VIACCESS
20 MEDIASET,Mediaset,13° EAST,HOTBIRD 13C,ITALIAN,SD,NAGRAVISION / VIDEOGUARD
20 MEDIASET,Mediaset,13° EAST,HOTBIRD 13E,ITALIAN,HD,NAGRAVISION / VIDEOGUARD
2M MONDE,Globecast,13° EAST,HOTBIRD 13B,ARABIC,SD,CLEAR,GENERAL
2M MONDE,Globecast,13° EAST,HOTBIRD 13C,ARABIC,SD,CLEAR,GENERAL
4 FUN DANCE,Cyfrowy Polsat,13° EAST,HOTBIRD 13C,POLISH,SD,CLEAR,MUSIC
4 FUN GOLD,Cyfrowy Polsat,13° EAST,HOTBIRD 13C,POLISH,SD,CLEAR,MUSIC
4 FUN TV,Cyfrowy Polsat,13° EAST,HOTBIRD 13C,POLISH,SD,CLEAR,MUSIC
6TER,Bis TV,13° EAST,HOTBIRD 13B,FRENCH,SD,VIACCESS

And so on....
输出

from selenium.webdriver.common.by import By
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

chrome_options = Options()
chrome_options.add_argument("--headless")
driver = webdriver.Chrome(chrome_options=chrome_options)
driver.get("http://www.eutelsat.com/deploy_tvLineUp/struts/advancedSearch.do?orbitalPositionId=13%B0%20EAST&Langue=EN")
WebDriverWait(driver,15).until(EC.presence_of_element_located((By.CSS_SELECTOR,".listresult")))

for row in driver.find_elements_by_xpath("//table[@class='listresult']//tr")[1:]:
    rowwisedata=[td.text.strip() for td in row.find_elements_by_xpath(".//td") if td.text!=""]
    print(','.join(rowwisedata))
112 UKRAÏNA,Globecast,13° EAST,HOTBIRD 13C,UKRAINIAN,HD,CLEAR
13 ULICA,Cyfrowy Polsat,13° EAST,HOTBIRD 13C,POLISH,HD,CONAX / IRDETO / MEDIAGUARD / NAGRAVISION / VIACCESS
20 MEDIASET,Mediaset,13° EAST,HOTBIRD 13C,ITALIAN,SD,NAGRAVISION / VIDEOGUARD
20 MEDIASET,Mediaset,13° EAST,HOTBIRD 13E,ITALIAN,HD,NAGRAVISION / VIDEOGUARD
2M MONDE,Globecast,13° EAST,HOTBIRD 13B,ARABIC,SD,CLEAR,GENERAL
2M MONDE,Globecast,13° EAST,HOTBIRD 13C,ARABIC,SD,CLEAR,GENERAL
4 FUN DANCE,Cyfrowy Polsat,13° EAST,HOTBIRD 13C,POLISH,SD,CLEAR,MUSIC
4 FUN GOLD,Cyfrowy Polsat,13° EAST,HOTBIRD 13C,POLISH,SD,CLEAR,MUSIC
4 FUN TV,Cyfrowy Polsat,13° EAST,HOTBIRD 13C,POLISH,SD,CLEAR,MUSIC
6TER,Bis TV,13° EAST,HOTBIRD 13B,FRENCH,SD,VIACCESS

And so on....
另一个解决方案:

contents=WebDriverWait(driver, 10).until(
    EC.visibility_of_all_elements_located((By.XPATH, "//table[@class='listresult']//tr[*]")))
for item in contents:
    print item.text
tokenize(replace(replace(substring-after(normalize-space(string-join(//tr//text()[normalize-space()]|//tr[@class]/@class,",")),",")," ?, ?",","),"oneven","even"),",even,")
注意:请将以下导入添加到您的解决方案中

from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
另一个解决方案:

contents=WebDriverWait(driver, 10).until(
    EC.visibility_of_all_elements_located((By.XPATH, "//table[@class='listresult']//tr[*]")))
for item in contents:
    print item.text
tokenize(replace(replace(substring-after(normalize-space(string-join(//tr//text()[normalize-space()]|//tr[@class]/@class,",")),",")," ?, ?",","),"oneven","even"),",even,")
注意:请将以下导入添加到您的解决方案中

from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait

XPath 2.0一行程序解决方案:

contents=WebDriverWait(driver, 10).until(
    EC.visibility_of_all_elements_located((By.XPATH, "//table[@class='listresult']//tr[*]")))
for item in contents:
    print item.text
tokenize(replace(replace(substring-after(normalize-space(string-join(//tr//text()[normalize-space()]|//tr[@class]/@class,",")),",")," ?, ?",","),"oneven","even"),",even,")
输出:

String='112 UKRAÏNA,Globecast,13° EAST,HOTBIRD 13C,UKRAINIAN,HD,CLEAR'

String='13 ULICA,Cyfrowy Polsat,13° EAST,HOTBIRD 13C,POLISH,HD,CONAX / IRDETO / MEDIAGUARD / NAGRAVISION / VIACCESS'

String='20 MEDIASET,Mediaset,13° EAST,HOTBIRD 13C,ITALIAN,SD,NAGRAVISION / VIDEOGUARD'

String='20 MEDIASET,Mediaset,13° EAST,HOTBIRD 13E,ITALIAN,HD,NAGRAVISION / VIDEOGUARD'

String='2M MONDE,Globecast,13° EAST,HOTBIRD 13B,ARABIC,SD,CLEAR,GENERAL'

String='2M MONDE,Globecast,13° EAST,HOTBIRD 13C,ARABIC,SD,CLEAR,GENERAL'

String='4 FUN DANCE,Cyfrowy Polsat,13° EAST,HOTBIRD 13C,POLISH,SD,CLEAR,MUSIC'

String='4 FUN GOLD,Cyfrowy Polsat,13° EAST,HOTBIRD 13C,POLISH,SD,CLEAR,MUSIC'

String='4 FUN TV,Cyfrowy Polsat,13° EAST,HOTBIRD 13C,POLISH,SD,CLEAR,MUSIC'

...

XPath 2.0一行程序解决方案:

contents=WebDriverWait(driver, 10).until(
    EC.visibility_of_all_elements_located((By.XPATH, "//table[@class='listresult']//tr[*]")))
for item in contents:
    print item.text
tokenize(replace(replace(substring-after(normalize-space(string-join(//tr//text()[normalize-space()]|//tr[@class]/@class,",")),",")," ?, ?",","),"oneven","even"),",even,")
输出:

String='112 UKRAÏNA,Globecast,13° EAST,HOTBIRD 13C,UKRAINIAN,HD,CLEAR'

String='13 ULICA,Cyfrowy Polsat,13° EAST,HOTBIRD 13C,POLISH,HD,CONAX / IRDETO / MEDIAGUARD / NAGRAVISION / VIACCESS'

String='20 MEDIASET,Mediaset,13° EAST,HOTBIRD 13C,ITALIAN,SD,NAGRAVISION / VIDEOGUARD'

String='20 MEDIASET,Mediaset,13° EAST,HOTBIRD 13E,ITALIAN,HD,NAGRAVISION / VIDEOGUARD'

String='2M MONDE,Globecast,13° EAST,HOTBIRD 13B,ARABIC,SD,CLEAR,GENERAL'

String='2M MONDE,Globecast,13° EAST,HOTBIRD 13C,ARABIC,SD,CLEAR,GENERAL'

String='4 FUN DANCE,Cyfrowy Polsat,13° EAST,HOTBIRD 13C,POLISH,SD,CLEAR,MUSIC'

String='4 FUN GOLD,Cyfrowy Polsat,13° EAST,HOTBIRD 13C,POLISH,SD,CLEAR,MUSIC'

String='4 FUN TV,Cyfrowy Polsat,13° EAST,HOTBIRD 13C,POLISH,SD,CLEAR,MUSIC'

...

很高兴它有助于满足您的要求。@gdogg371:由于页面是静态的,您可以使用pandas数据框并将数据加载到csv中。如果您需要,请告诉我。我将更新它,您在这种情况下不需要selenium,而且更新速度也非常快。很高兴它有助于满足您的要求。@gdogg371:由于页面是静态的,您可以使用pandas数据帧和加载数据到csv。如果你需要它让我知道。我会更新,你不需要硒在这种情况下,以及非常快。