Python：webdriver Firefox和PhantomJS之间的差异问题_Python_Xpath_Selenium Webdriver

Python：webdriver Firefox和PhantomJS之间的差异问题

python xpath selenium-webdriver

Python：webdriver Firefox和PhantomJS之间的差异问题,python,xpath,selenium-webdriver,Python,Xpath,Selenium Webdriver,在过去的一两天里，我一直在编写这个Python脚本，当我使用Firefox webdriver时，一切都很好，但是当我切换到使用PhantomJS这样的无头浏览器时，它会在使用setNumber=parseSetNumber（setName[0]）时失败出现错误错误：由于setName为空，列表索引超出范围它前面的行setName=atag.xpath（“./div[contains（@class，'product_info'）]]/div[contains（@class，'product_n

在过去的一两天里，我一直在编写这个Python脚本，当我使用Firefox webdriver时，一切都很好，但是当我切换到使用PhantomJS这样的无头浏览器时，它会在使用

setNumber=parseSetNumber（setName[0]）时失败

出现错误

错误：由于setName
为空，列表索引超出范围
它前面的行setName=atag.xpath（“./div[contains（@class，'product_info'）]]/div[contains（@class，'product_name'）]]/a/text（）”
在仅使用PhantomJS webdriver时不返回任何内容，如果使用Firefox webdriver，则返回一个值
只有当我将webdriver从Firefox切换到PhantomJS时，错误才会发生。我使用PhantomJS，因为脚本是在linux服务器上运行的
import time
import os.path
import lxml.html as LH
import re
import sys
from selenium import webdriver
from random import randint
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

PARAMS = sys.argv
URL = PARAMS[1]
BASEURL = URL[:URL.rfind('/')+1]

# Parses the set name for the set number
def parseSetNumber(string):
    string = string.split(' ')
    stringLength = len(string)
    string = string[(stringLength - 1)]
    if string.replace('.','').isdigit():
        return string
    else:
        return ""

# Returns set reference for this site
def parseRefId(string):
    string = string.split('_')
    return str(string[2])

try:
    PAGE_NUMBER = 1

    #--------------------------------------------------
    ## Get initial page

    driver = webdriver.PhantomJS()
    driver.get(PARAMS[1])

    #--------------------------------------------------
    ## Get page count

    # Give page time to load
    time.sleep(2)

    PAGE_RAW = driver.page_source
    PAGE_RAW = LH.fromstring(PAGE_RAW)
    PAGE_COUNT_RAW = PAGE_RAW.xpath("//div[contains(@class, 'pageControlMenu')]/div/ul/li")
    PAGE_COUNT = len(PAGE_COUNT_RAW) - 2

    #--------------------------------------------------
    ## Get page if its not page one

    while PAGE_NUMBER <= PAGE_COUNT:
        #--------------------------------------------------
        ## Create empty file
        FILE_NAME = PARAMS[3] + 'json/' + time.strftime("%Y%m%d%H") + '_' + str(PARAMS[2]) + '_' + str(PAGE_NUMBER) + '.json'
        #--------------------------------------------------
        ## Create JSON file if it doesnt exist
        if os.path.exists(FILE_NAME)==False:
            JSON_FILE = open(FILE_NAME, "a+", encoding="utf-8")
        else:
            JSON_FILE = open(FILE_NAME, "w", encoding="utf-8")
        JSON_FILE.write("{")
        #--------------------------------------------------
        # Click page for next page if not page 1
        if PAGE_NUMBER > 1:
            index = 0
            for atag in PAGE_COUNT_RAW:
                if index == PAGE_NUMBER:
                    elements = driver.find_elements_by_xpath("//div[contains(@class, 'pageControlMenu')]/div/ul/li")
                    if elements:
                        element = elements[index].find_elements_by_xpath("./a")
                        if element:
                            element[0].click()
                            time.sleep(randint(3,5))
                index += 1
        #--------------------------------------------------
        ## Remove survey box if it pops up and log
        try:
            surveyBox = driver.find_element_by_link_text("No, thanks")
            if surveyBox:
                surveyBox.click()
                print("Store[" + str(PARAMS[2]) + "]: Survey box found on page - " + str(PAGE_NUMBER))
        except:
            print("Store[" + str(PARAMS[2]) + "]: No survey box on page - " + str(PAGE_NUMBER))
        #--------------------------------------------------
        ## Proces page
        # If page is greater then 1 then get the page source of the new page.
        if PAGE_NUMBER > 1:
            PAGE_RAW = driver.page_source
            PAGE_RAW = LH.fromstring(PAGE_RAW)
        PAGE_RAW = PAGE_RAW.xpath("//div[contains(@class, 'estore_product_container')]")
        index = 0
        size = len(PAGE_RAW)
        for atag in PAGE_RAW:
            if PAGE_NUMBER > 1 and index == 0:
                WebDriverWait(driver,10).until(EC.presence_of_element_located((By.XPATH, "./div[contains(@class, 'product_info')]/div[contains(@class, 'product_name')]/a")))
            setStore = PARAMS[2]
            setName = atag.xpath("./div[contains(@class, 'product_info')]/div[contains(@class, 'product_name')]/a/text()")
            setNumber = parseSetNumber(setName[0])
            setPrice = atag.xpath("./div[contains(@class, 'product_info')]/div[contains(@class, 'product_price')]/text()")
            setLink = atag.xpath("./div[contains(@class, 'product_info')]/div[contains(@class, 'product_name')]/a/@href")
            setRef = atag.xpath("./div[contains(@class, 'product_info')]/div[contains(@class, 'product_price')]/@id")
            if setRef:
                setRef = parseRefId(setRef[0])
            if re.search('[0-9\.]+', setPrice[0]) is not None:
                JSON_FILE.write("\"" + str(index) + "\":{\"store\":\"" + str(setStore) + "\",\"name\":\"" + str(setName[0]) + "\",\"number\":\"" + str(setNumber) + "\",\"price\":\"" + re.search('[0-9\.]+', setPrice[0]).group() + "\",\"ref\":\"" + str(setRef) + "\",\"link\":\"" + str(setLink[0]) + "\"}")
                if index+1 < size:
                    JSON_FILE.write(",")
            index += 1
        #--------------------------------------------------
        ## Close JSON file
        JSON_FILE.write("}")
        JSON_FILE.close()
        #--------------------------------------------------
        ## Increment page number
        PAGE_NUMBER += 1
        #--------------------------------------------------

    #--------------------------------------------------
    ## Close webdriver
    driver.quit()
    #--------------------------------------------------

except Exception as e:
    print('Error: ' + str(e.args[0]))

# Remove gecodriver.log file
GHOSTDRIVER_FILE = str(PARAMS[3]) + 'jobs/ghostdriver.log'
if os.path.exists(GHOSTDRIVER_FILE)==True:
    os.remove(GHOSTDRIVER_FILE)

好的，看起来我已经解决了这个问题，在使用PhantomJS时，我不得不为webdriver添加set\u windows\u size
选项
原来：
driver = webdriver.PhantomJS()
driver.get(PARAMS[1])

解决方案：
driver = webdriver.PhantomJS()
driver.set_window_size(1024, 768)
driver.get(PARAMS[1])

现在PhantomJS Web驱动程序的工作方式与Firefox Web驱动程序的工作方式相同
driver = webdriver.PhantomJS()
driver.set_window_size(1024, 768)
driver.get(PARAMS[1])