Warning: file_get_contents(/data/phpspider/zhask/data//catemap/8/python-3.x/17.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Python 如何在组合对象之前处理索引器异常_Python_Python 3.x_Web Scraping - Fatal编程技术网

Python 如何在组合对象之前处理索引器异常

Python 如何在组合对象之前处理索引器异常,python,python-3.x,web-scraping,Python,Python 3.x,Web Scraping,我希望实现的目标: import ast import sys # Create empty lists [Global] jobs = [] names = [] dates = [] summaries = [] locations = [] # Function - Ingest parsed HTML data | Filter out required values def getJobs(parsedHTML): # Loop - Get job title f

我希望实现的目标:

import ast
import sys

# Create empty lists [Global]
jobs = []
names = []
dates = []
summaries = []
locations = []

# Function - Ingest parsed HTML data | Filter out required values
def getJobs(parsedHTML):

    # Loop - Get job title
    for div in parsedHTML.find_all(name='h2', attrs={'class':'title'}):
        for a in div.find_all(name='a', attrs={'data-tn-element':'jobTitle'}):
            val = str(a.getText().strip())
            if val is None:
                locations.append({"job-title": "null"})
            else:
                dictItem = {"job-title": f"{val}"}
                jobs.append(dictItem)
            

    # Loop - Get job poster's name
    for div in parsedHTML.find_all(name='div', attrs={'class':'sjcl'}):
        for span in div.find_all(name='span', attrs={'class':'company'}):
            val = str(span.getText().strip())
            if val is None:
                locations.append({"company-name": "null"})
            else:
                dictItem = {"company-name": f"{val}"}
                names.append(dictItem)

    # Loop - Get the date the job post was created
    for div in parsedHTML.find_all(name='div', attrs={'class':'result-link-bar'}):
        for span in div.find_all(name='span', attrs={'class':'date date-a11y'}):
            val = str(span.getText().strip())
            if val is None:
                locations.append({"date-created": "null"})
            else:
                dictItem = {"date-created": f"{val}"}
                dates.append(dictItem)

    # Loop - Get short job description
    for divParent in parsedHTML.find_all(name='div', attrs={'class':'result'}):
        for divChild in divParent.find_all(name='div', attrs={'class':'summary'}):
            val = str(divChild.getText().strip())
            if val is None:
                locations.append({"short-description": "null"})
            else:
                dictItem = {"short-description": f"{val}"}
                summaries.append(dictItem)

    # Loop - Get job location
    for div in parsedHTML.find_all(name='div', attrs={'class':'sjcl'}):
        for span in div.find_all(name='span', attrs={'class':'location'}):
            val = str(span.getText().strip())
            if val is None:
                locations.append({"location": "null"})
            else:
                dictItem = {"location": f"{val}"}
                locations.append(dictItem)

    
# Function - Generate test data
def testData(parsedHTML, typeProc):

    # typeProc == True | Export data to text files
    if typeProc:
        #getJobs(parsedHTML)

        with open("jobs.txt", "w") as file:
            for line in jobs:
                file.write(str(line))
                file.write("\n")
            file.close()
        
        with open("names.txt", "w") as file:
            for line in names:
                file.write(str(line))
                file.write("\n")
            file.close()
        
        with open("dates.txt", "w") as file:
            for line in dates:
                file.write(str(line))
                file.write("\n")
            file.close()
        
        with open("summaries.txt", "w") as file:
            for line in summaries:
                file.write(str(line))
                file.write("\n")
            file.close()
        
        with open("locations.txt", "w") as file:
            for line in locations:
                file.write(str(line))
                file.write("\n")
            file.close()
    
    # typeProc == False | Import data from txt files, convert to dictionary and append to list
    elif typeProc == False:
        with open("jobs.txt", "r") as file:
            content = file.readlines()
            for i in range(len(content)):
                content[i] = content[i].replace("\n", "")
                content[i] = ast.literal_eval(content[i])
                jobs.append(content[i])
            file.close()

        with open("names.txt", "r") as file:
            content = file.readlines()
            for i in range(len(content)):
                content[i] = content[i].replace("\n", "")
                content[i] = ast.literal_eval(content[i])
                names.append(content[i])
            file.close()

        with open("dates.txt", "r") as file:
            content = file.readlines()
            for i in range(len(content)):
                content[i] = content[i].replace("\n", "")
                content[i] = ast.literal_eval(content[i])
                dates.append(content[i])
            file.close()

        with open("summaries.txt", "r") as file:
            content = file.readlines()
            for i in range(len(content)):
                content[i] = content[i].replace("\n", "")
                content[i] = ast.literal_eval(content[i])
                summaries.append(content[i])
            file.close()

        with open("locations.txt", "r") as file:
            content = file.readlines()
            for i in range(len(content)):
                content[i] = content[i].replace("\n", "")
                content[i] = ast.literal_eval(content[i])
                locations.append(content[i])
            file.close()

    # Else | If this else is hit, something is greatly fvcked
    else:
        print("Function: testData | Error: if statement else output")
        sys.exit(1)

# Function - Remove items from all lists
def wipeLists():
    jobs.clear()
    names.clear()
    dates.clear()
    summaries.clear()
    locations.clear()

# Function - JSON Blob Generator
def genJSON(parsedHTML):
    # Testing with cached local IRL data
    #testData(parsedHTML, False)

    getJobs(parsedHTML)
    jsonBlob = []
    
    # Merge dictionaries | Combining dictionaries into single object + Append to jsonBlob list
    for i in range(len(jobs)):
        sumObj = {**jobs[i], **names[i], **dates[i], **summaries[i], **locations[i]}
        #sumObj = {**jobs[i], **names[i], **dates[i], **summaries[i]}
        jsonBlob.append(sumObj)

    return jsonBlob
# Loop - Get job location
for div in parsedHTML.find_all(name='div', attrs={'class':'sjcl'}):
    for divChild in div.find_all(name='div', attrs={'class':'recJobLoc'}):
        dictItem = {"location": f"{divChild['data-rc-loc']}"}
        locations.append(dictItem)
下面添加的代码过滤经过解析的HTML页面以查找特定值。然后以字典的形式将每个特定值添加到其自己的特定列表中。一旦所有的值都添加到列表中,其中的字典就会合并成一个JSON blob,然后我就可以导出了

注意-这是quick PoC的一部分,所以它写得又快又脏。原谅我

我的问题:

import ast
import sys

# Create empty lists [Global]
jobs = []
names = []
dates = []
summaries = []
locations = []

# Function - Ingest parsed HTML data | Filter out required values
def getJobs(parsedHTML):

    # Loop - Get job title
    for div in parsedHTML.find_all(name='h2', attrs={'class':'title'}):
        for a in div.find_all(name='a', attrs={'data-tn-element':'jobTitle'}):
            val = str(a.getText().strip())
            if val is None:
                locations.append({"job-title": "null"})
            else:
                dictItem = {"job-title": f"{val}"}
                jobs.append(dictItem)
            

    # Loop - Get job poster's name
    for div in parsedHTML.find_all(name='div', attrs={'class':'sjcl'}):
        for span in div.find_all(name='span', attrs={'class':'company'}):
            val = str(span.getText().strip())
            if val is None:
                locations.append({"company-name": "null"})
            else:
                dictItem = {"company-name": f"{val}"}
                names.append(dictItem)

    # Loop - Get the date the job post was created
    for div in parsedHTML.find_all(name='div', attrs={'class':'result-link-bar'}):
        for span in div.find_all(name='span', attrs={'class':'date date-a11y'}):
            val = str(span.getText().strip())
            if val is None:
                locations.append({"date-created": "null"})
            else:
                dictItem = {"date-created": f"{val}"}
                dates.append(dictItem)

    # Loop - Get short job description
    for divParent in parsedHTML.find_all(name='div', attrs={'class':'result'}):
        for divChild in divParent.find_all(name='div', attrs={'class':'summary'}):
            val = str(divChild.getText().strip())
            if val is None:
                locations.append({"short-description": "null"})
            else:
                dictItem = {"short-description": f"{val}"}
                summaries.append(dictItem)

    # Loop - Get job location
    for div in parsedHTML.find_all(name='div', attrs={'class':'sjcl'}):
        for span in div.find_all(name='span', attrs={'class':'location'}):
            val = str(span.getText().strip())
            if val is None:
                locations.append({"location": "null"})
            else:
                dictItem = {"location": f"{val}"}
                locations.append(dictItem)

    
# Function - Generate test data
def testData(parsedHTML, typeProc):

    # typeProc == True | Export data to text files
    if typeProc:
        #getJobs(parsedHTML)

        with open("jobs.txt", "w") as file:
            for line in jobs:
                file.write(str(line))
                file.write("\n")
            file.close()
        
        with open("names.txt", "w") as file:
            for line in names:
                file.write(str(line))
                file.write("\n")
            file.close()
        
        with open("dates.txt", "w") as file:
            for line in dates:
                file.write(str(line))
                file.write("\n")
            file.close()
        
        with open("summaries.txt", "w") as file:
            for line in summaries:
                file.write(str(line))
                file.write("\n")
            file.close()
        
        with open("locations.txt", "w") as file:
            for line in locations:
                file.write(str(line))
                file.write("\n")
            file.close()
    
    # typeProc == False | Import data from txt files, convert to dictionary and append to list
    elif typeProc == False:
        with open("jobs.txt", "r") as file:
            content = file.readlines()
            for i in range(len(content)):
                content[i] = content[i].replace("\n", "")
                content[i] = ast.literal_eval(content[i])
                jobs.append(content[i])
            file.close()

        with open("names.txt", "r") as file:
            content = file.readlines()
            for i in range(len(content)):
                content[i] = content[i].replace("\n", "")
                content[i] = ast.literal_eval(content[i])
                names.append(content[i])
            file.close()

        with open("dates.txt", "r") as file:
            content = file.readlines()
            for i in range(len(content)):
                content[i] = content[i].replace("\n", "")
                content[i] = ast.literal_eval(content[i])
                dates.append(content[i])
            file.close()

        with open("summaries.txt", "r") as file:
            content = file.readlines()
            for i in range(len(content)):
                content[i] = content[i].replace("\n", "")
                content[i] = ast.literal_eval(content[i])
                summaries.append(content[i])
            file.close()

        with open("locations.txt", "r") as file:
            content = file.readlines()
            for i in range(len(content)):
                content[i] = content[i].replace("\n", "")
                content[i] = ast.literal_eval(content[i])
                locations.append(content[i])
            file.close()

    # Else | If this else is hit, something is greatly fvcked
    else:
        print("Function: testData | Error: if statement else output")
        sys.exit(1)

# Function - Remove items from all lists
def wipeLists():
    jobs.clear()
    names.clear()
    dates.clear()
    summaries.clear()
    locations.clear()

# Function - JSON Blob Generator
def genJSON(parsedHTML):
    # Testing with cached local IRL data
    #testData(parsedHTML, False)

    getJobs(parsedHTML)
    jsonBlob = []
    
    # Merge dictionaries | Combining dictionaries into single object + Append to jsonBlob list
    for i in range(len(jobs)):
        sumObj = {**jobs[i], **names[i], **dates[i], **summaries[i], **locations[i]}
        #sumObj = {**jobs[i], **names[i], **dates[i], **summaries[i]}
        jsonBlob.append(sumObj)

    return jsonBlob
# Loop - Get job location
for div in parsedHTML.find_all(name='div', attrs={'class':'sjcl'}):
    for divChild in div.find_all(name='div', attrs={'class':'recJobLoc'}):
        dictItem = {"location": f"{divChild['data-rc-loc']}"}
        locations.append(dictItem)
将以下列表和字典组合在一起时,我在导出blob时不会遇到任何问题:

  • 工作
  • 名字
  • 日期
  • 摘要
但是,当添加位置列表以便合并到blob中时,会遇到索引器异常。如下图所示:

我的分析:

import ast
import sys

# Create empty lists [Global]
jobs = []
names = []
dates = []
summaries = []
locations = []

# Function - Ingest parsed HTML data | Filter out required values
def getJobs(parsedHTML):

    # Loop - Get job title
    for div in parsedHTML.find_all(name='h2', attrs={'class':'title'}):
        for a in div.find_all(name='a', attrs={'data-tn-element':'jobTitle'}):
            val = str(a.getText().strip())
            if val is None:
                locations.append({"job-title": "null"})
            else:
                dictItem = {"job-title": f"{val}"}
                jobs.append(dictItem)
            

    # Loop - Get job poster's name
    for div in parsedHTML.find_all(name='div', attrs={'class':'sjcl'}):
        for span in div.find_all(name='span', attrs={'class':'company'}):
            val = str(span.getText().strip())
            if val is None:
                locations.append({"company-name": "null"})
            else:
                dictItem = {"company-name": f"{val}"}
                names.append(dictItem)

    # Loop - Get the date the job post was created
    for div in parsedHTML.find_all(name='div', attrs={'class':'result-link-bar'}):
        for span in div.find_all(name='span', attrs={'class':'date date-a11y'}):
            val = str(span.getText().strip())
            if val is None:
                locations.append({"date-created": "null"})
            else:
                dictItem = {"date-created": f"{val}"}
                dates.append(dictItem)

    # Loop - Get short job description
    for divParent in parsedHTML.find_all(name='div', attrs={'class':'result'}):
        for divChild in divParent.find_all(name='div', attrs={'class':'summary'}):
            val = str(divChild.getText().strip())
            if val is None:
                locations.append({"short-description": "null"})
            else:
                dictItem = {"short-description": f"{val}"}
                summaries.append(dictItem)

    # Loop - Get job location
    for div in parsedHTML.find_all(name='div', attrs={'class':'sjcl'}):
        for span in div.find_all(name='span', attrs={'class':'location'}):
            val = str(span.getText().strip())
            if val is None:
                locations.append({"location": "null"})
            else:
                dictItem = {"location": f"{val}"}
                locations.append(dictItem)

    
# Function - Generate test data
def testData(parsedHTML, typeProc):

    # typeProc == True | Export data to text files
    if typeProc:
        #getJobs(parsedHTML)

        with open("jobs.txt", "w") as file:
            for line in jobs:
                file.write(str(line))
                file.write("\n")
            file.close()
        
        with open("names.txt", "w") as file:
            for line in names:
                file.write(str(line))
                file.write("\n")
            file.close()
        
        with open("dates.txt", "w") as file:
            for line in dates:
                file.write(str(line))
                file.write("\n")
            file.close()
        
        with open("summaries.txt", "w") as file:
            for line in summaries:
                file.write(str(line))
                file.write("\n")
            file.close()
        
        with open("locations.txt", "w") as file:
            for line in locations:
                file.write(str(line))
                file.write("\n")
            file.close()
    
    # typeProc == False | Import data from txt files, convert to dictionary and append to list
    elif typeProc == False:
        with open("jobs.txt", "r") as file:
            content = file.readlines()
            for i in range(len(content)):
                content[i] = content[i].replace("\n", "")
                content[i] = ast.literal_eval(content[i])
                jobs.append(content[i])
            file.close()

        with open("names.txt", "r") as file:
            content = file.readlines()
            for i in range(len(content)):
                content[i] = content[i].replace("\n", "")
                content[i] = ast.literal_eval(content[i])
                names.append(content[i])
            file.close()

        with open("dates.txt", "r") as file:
            content = file.readlines()
            for i in range(len(content)):
                content[i] = content[i].replace("\n", "")
                content[i] = ast.literal_eval(content[i])
                dates.append(content[i])
            file.close()

        with open("summaries.txt", "r") as file:
            content = file.readlines()
            for i in range(len(content)):
                content[i] = content[i].replace("\n", "")
                content[i] = ast.literal_eval(content[i])
                summaries.append(content[i])
            file.close()

        with open("locations.txt", "r") as file:
            content = file.readlines()
            for i in range(len(content)):
                content[i] = content[i].replace("\n", "")
                content[i] = ast.literal_eval(content[i])
                locations.append(content[i])
            file.close()

    # Else | If this else is hit, something is greatly fvcked
    else:
        print("Function: testData | Error: if statement else output")
        sys.exit(1)

# Function - Remove items from all lists
def wipeLists():
    jobs.clear()
    names.clear()
    dates.clear()
    summaries.clear()
    locations.clear()

# Function - JSON Blob Generator
def genJSON(parsedHTML):
    # Testing with cached local IRL data
    #testData(parsedHTML, False)

    getJobs(parsedHTML)
    jsonBlob = []
    
    # Merge dictionaries | Combining dictionaries into single object + Append to jsonBlob list
    for i in range(len(jobs)):
        sumObj = {**jobs[i], **names[i], **dates[i], **summaries[i], **locations[i]}
        #sumObj = {**jobs[i], **names[i], **dates[i], **summaries[i]}
        jsonBlob.append(sumObj)

    return jsonBlob
# Loop - Get job location
for div in parsedHTML.find_all(name='div', attrs={'class':'sjcl'}):
    for divChild in div.find_all(name='div', attrs={'class':'recJobLoc'}):
        dictItem = {"location": f"{divChild['data-rc-loc']}"}
        locations.append(dictItem)
我发现,有时找不到该值,因为我无法控制该值未包含在解析的HTML中的原因,即在创建该值时未将其添加到用户。本例中的问题是位置列表的len为14,而其他列表的len为15,这导致在使用for循环组合列表时出现索引器异常

我的问题:

import ast
import sys

# Create empty lists [Global]
jobs = []
names = []
dates = []
summaries = []
locations = []

# Function - Ingest parsed HTML data | Filter out required values
def getJobs(parsedHTML):

    # Loop - Get job title
    for div in parsedHTML.find_all(name='h2', attrs={'class':'title'}):
        for a in div.find_all(name='a', attrs={'data-tn-element':'jobTitle'}):
            val = str(a.getText().strip())
            if val is None:
                locations.append({"job-title": "null"})
            else:
                dictItem = {"job-title": f"{val}"}
                jobs.append(dictItem)
            

    # Loop - Get job poster's name
    for div in parsedHTML.find_all(name='div', attrs={'class':'sjcl'}):
        for span in div.find_all(name='span', attrs={'class':'company'}):
            val = str(span.getText().strip())
            if val is None:
                locations.append({"company-name": "null"})
            else:
                dictItem = {"company-name": f"{val}"}
                names.append(dictItem)

    # Loop - Get the date the job post was created
    for div in parsedHTML.find_all(name='div', attrs={'class':'result-link-bar'}):
        for span in div.find_all(name='span', attrs={'class':'date date-a11y'}):
            val = str(span.getText().strip())
            if val is None:
                locations.append({"date-created": "null"})
            else:
                dictItem = {"date-created": f"{val}"}
                dates.append(dictItem)

    # Loop - Get short job description
    for divParent in parsedHTML.find_all(name='div', attrs={'class':'result'}):
        for divChild in divParent.find_all(name='div', attrs={'class':'summary'}):
            val = str(divChild.getText().strip())
            if val is None:
                locations.append({"short-description": "null"})
            else:
                dictItem = {"short-description": f"{val}"}
                summaries.append(dictItem)

    # Loop - Get job location
    for div in parsedHTML.find_all(name='div', attrs={'class':'sjcl'}):
        for span in div.find_all(name='span', attrs={'class':'location'}):
            val = str(span.getText().strip())
            if val is None:
                locations.append({"location": "null"})
            else:
                dictItem = {"location": f"{val}"}
                locations.append(dictItem)

    
# Function - Generate test data
def testData(parsedHTML, typeProc):

    # typeProc == True | Export data to text files
    if typeProc:
        #getJobs(parsedHTML)

        with open("jobs.txt", "w") as file:
            for line in jobs:
                file.write(str(line))
                file.write("\n")
            file.close()
        
        with open("names.txt", "w") as file:
            for line in names:
                file.write(str(line))
                file.write("\n")
            file.close()
        
        with open("dates.txt", "w") as file:
            for line in dates:
                file.write(str(line))
                file.write("\n")
            file.close()
        
        with open("summaries.txt", "w") as file:
            for line in summaries:
                file.write(str(line))
                file.write("\n")
            file.close()
        
        with open("locations.txt", "w") as file:
            for line in locations:
                file.write(str(line))
                file.write("\n")
            file.close()
    
    # typeProc == False | Import data from txt files, convert to dictionary and append to list
    elif typeProc == False:
        with open("jobs.txt", "r") as file:
            content = file.readlines()
            for i in range(len(content)):
                content[i] = content[i].replace("\n", "")
                content[i] = ast.literal_eval(content[i])
                jobs.append(content[i])
            file.close()

        with open("names.txt", "r") as file:
            content = file.readlines()
            for i in range(len(content)):
                content[i] = content[i].replace("\n", "")
                content[i] = ast.literal_eval(content[i])
                names.append(content[i])
            file.close()

        with open("dates.txt", "r") as file:
            content = file.readlines()
            for i in range(len(content)):
                content[i] = content[i].replace("\n", "")
                content[i] = ast.literal_eval(content[i])
                dates.append(content[i])
            file.close()

        with open("summaries.txt", "r") as file:
            content = file.readlines()
            for i in range(len(content)):
                content[i] = content[i].replace("\n", "")
                content[i] = ast.literal_eval(content[i])
                summaries.append(content[i])
            file.close()

        with open("locations.txt", "r") as file:
            content = file.readlines()
            for i in range(len(content)):
                content[i] = content[i].replace("\n", "")
                content[i] = ast.literal_eval(content[i])
                locations.append(content[i])
            file.close()

    # Else | If this else is hit, something is greatly fvcked
    else:
        print("Function: testData | Error: if statement else output")
        sys.exit(1)

# Function - Remove items from all lists
def wipeLists():
    jobs.clear()
    names.clear()
    dates.clear()
    summaries.clear()
    locations.clear()

# Function - JSON Blob Generator
def genJSON(parsedHTML):
    # Testing with cached local IRL data
    #testData(parsedHTML, False)

    getJobs(parsedHTML)
    jsonBlob = []
    
    # Merge dictionaries | Combining dictionaries into single object + Append to jsonBlob list
    for i in range(len(jobs)):
        sumObj = {**jobs[i], **names[i], **dates[i], **summaries[i], **locations[i]}
        #sumObj = {**jobs[i], **names[i], **dates[i], **summaries[i]}
        jsonBlob.append(sumObj)

    return jsonBlob
# Loop - Get job location
for div in parsedHTML.find_all(name='div', attrs={'class':'sjcl'}):
    for divChild in div.find_all(name='div', attrs={'class':'recJobLoc'}):
        dictItem = {"location": f"{divChild['data-rc-loc']}"}
        locations.append(dictItem)
如下面的代码所示,我试图通过分配一个占位符值“null”来处理这个问题,此时未找到刮取的值,但由于某种原因,该值未应用,并且仍然遇到Indexer异常。任何帮助都将不胜感激,提前谢谢你

我的代码:

import ast
import sys

# Create empty lists [Global]
jobs = []
names = []
dates = []
summaries = []
locations = []

# Function - Ingest parsed HTML data | Filter out required values
def getJobs(parsedHTML):

    # Loop - Get job title
    for div in parsedHTML.find_all(name='h2', attrs={'class':'title'}):
        for a in div.find_all(name='a', attrs={'data-tn-element':'jobTitle'}):
            val = str(a.getText().strip())
            if val is None:
                locations.append({"job-title": "null"})
            else:
                dictItem = {"job-title": f"{val}"}
                jobs.append(dictItem)
            

    # Loop - Get job poster's name
    for div in parsedHTML.find_all(name='div', attrs={'class':'sjcl'}):
        for span in div.find_all(name='span', attrs={'class':'company'}):
            val = str(span.getText().strip())
            if val is None:
                locations.append({"company-name": "null"})
            else:
                dictItem = {"company-name": f"{val}"}
                names.append(dictItem)

    # Loop - Get the date the job post was created
    for div in parsedHTML.find_all(name='div', attrs={'class':'result-link-bar'}):
        for span in div.find_all(name='span', attrs={'class':'date date-a11y'}):
            val = str(span.getText().strip())
            if val is None:
                locations.append({"date-created": "null"})
            else:
                dictItem = {"date-created": f"{val}"}
                dates.append(dictItem)

    # Loop - Get short job description
    for divParent in parsedHTML.find_all(name='div', attrs={'class':'result'}):
        for divChild in divParent.find_all(name='div', attrs={'class':'summary'}):
            val = str(divChild.getText().strip())
            if val is None:
                locations.append({"short-description": "null"})
            else:
                dictItem = {"short-description": f"{val}"}
                summaries.append(dictItem)

    # Loop - Get job location
    for div in parsedHTML.find_all(name='div', attrs={'class':'sjcl'}):
        for span in div.find_all(name='span', attrs={'class':'location'}):
            val = str(span.getText().strip())
            if val is None:
                locations.append({"location": "null"})
            else:
                dictItem = {"location": f"{val}"}
                locations.append(dictItem)

    
# Function - Generate test data
def testData(parsedHTML, typeProc):

    # typeProc == True | Export data to text files
    if typeProc:
        #getJobs(parsedHTML)

        with open("jobs.txt", "w") as file:
            for line in jobs:
                file.write(str(line))
                file.write("\n")
            file.close()
        
        with open("names.txt", "w") as file:
            for line in names:
                file.write(str(line))
                file.write("\n")
            file.close()
        
        with open("dates.txt", "w") as file:
            for line in dates:
                file.write(str(line))
                file.write("\n")
            file.close()
        
        with open("summaries.txt", "w") as file:
            for line in summaries:
                file.write(str(line))
                file.write("\n")
            file.close()
        
        with open("locations.txt", "w") as file:
            for line in locations:
                file.write(str(line))
                file.write("\n")
            file.close()
    
    # typeProc == False | Import data from txt files, convert to dictionary and append to list
    elif typeProc == False:
        with open("jobs.txt", "r") as file:
            content = file.readlines()
            for i in range(len(content)):
                content[i] = content[i].replace("\n", "")
                content[i] = ast.literal_eval(content[i])
                jobs.append(content[i])
            file.close()

        with open("names.txt", "r") as file:
            content = file.readlines()
            for i in range(len(content)):
                content[i] = content[i].replace("\n", "")
                content[i] = ast.literal_eval(content[i])
                names.append(content[i])
            file.close()

        with open("dates.txt", "r") as file:
            content = file.readlines()
            for i in range(len(content)):
                content[i] = content[i].replace("\n", "")
                content[i] = ast.literal_eval(content[i])
                dates.append(content[i])
            file.close()

        with open("summaries.txt", "r") as file:
            content = file.readlines()
            for i in range(len(content)):
                content[i] = content[i].replace("\n", "")
                content[i] = ast.literal_eval(content[i])
                summaries.append(content[i])
            file.close()

        with open("locations.txt", "r") as file:
            content = file.readlines()
            for i in range(len(content)):
                content[i] = content[i].replace("\n", "")
                content[i] = ast.literal_eval(content[i])
                locations.append(content[i])
            file.close()

    # Else | If this else is hit, something is greatly fvcked
    else:
        print("Function: testData | Error: if statement else output")
        sys.exit(1)

# Function - Remove items from all lists
def wipeLists():
    jobs.clear()
    names.clear()
    dates.clear()
    summaries.clear()
    locations.clear()

# Function - JSON Blob Generator
def genJSON(parsedHTML):
    # Testing with cached local IRL data
    #testData(parsedHTML, False)

    getJobs(parsedHTML)
    jsonBlob = []
    
    # Merge dictionaries | Combining dictionaries into single object + Append to jsonBlob list
    for i in range(len(jobs)):
        sumObj = {**jobs[i], **names[i], **dates[i], **summaries[i], **locations[i]}
        #sumObj = {**jobs[i], **names[i], **dates[i], **summaries[i]}
        jsonBlob.append(sumObj)

    return jsonBlob
# Loop - Get job location
for div in parsedHTML.find_all(name='div', attrs={'class':'sjcl'}):
    for divChild in div.find_all(name='div', attrs={'class':'recJobLoc'}):
        dictItem = {"location": f"{divChild['data-rc-loc']}"}
        locations.append(dictItem)

感谢@pavel就如何处理这一问题所作的说明。我发现我要查找的值在创建时实际上是一个必填字段,出于某种原因,我在筛选解析数据时没有获得正确的值量

我再次查看了页面的源代码,发现有另一个字段具有我要查找的确切值。因此,现在不是获取父div中span元素的文本,而是获取父div元素的自定义data-*属性值。我在测试时没有遇到任何错误

更新代码:

import ast
import sys

# Create empty lists [Global]
jobs = []
names = []
dates = []
summaries = []
locations = []

# Function - Ingest parsed HTML data | Filter out required values
def getJobs(parsedHTML):

    # Loop - Get job title
    for div in parsedHTML.find_all(name='h2', attrs={'class':'title'}):
        for a in div.find_all(name='a', attrs={'data-tn-element':'jobTitle'}):
            val = str(a.getText().strip())
            if val is None:
                locations.append({"job-title": "null"})
            else:
                dictItem = {"job-title": f"{val}"}
                jobs.append(dictItem)
            

    # Loop - Get job poster's name
    for div in parsedHTML.find_all(name='div', attrs={'class':'sjcl'}):
        for span in div.find_all(name='span', attrs={'class':'company'}):
            val = str(span.getText().strip())
            if val is None:
                locations.append({"company-name": "null"})
            else:
                dictItem = {"company-name": f"{val}"}
                names.append(dictItem)

    # Loop - Get the date the job post was created
    for div in parsedHTML.find_all(name='div', attrs={'class':'result-link-bar'}):
        for span in div.find_all(name='span', attrs={'class':'date date-a11y'}):
            val = str(span.getText().strip())
            if val is None:
                locations.append({"date-created": "null"})
            else:
                dictItem = {"date-created": f"{val}"}
                dates.append(dictItem)

    # Loop - Get short job description
    for divParent in parsedHTML.find_all(name='div', attrs={'class':'result'}):
        for divChild in divParent.find_all(name='div', attrs={'class':'summary'}):
            val = str(divChild.getText().strip())
            if val is None:
                locations.append({"short-description": "null"})
            else:
                dictItem = {"short-description": f"{val}"}
                summaries.append(dictItem)

    # Loop - Get job location
    for div in parsedHTML.find_all(name='div', attrs={'class':'sjcl'}):
        for span in div.find_all(name='span', attrs={'class':'location'}):
            val = str(span.getText().strip())
            if val is None:
                locations.append({"location": "null"})
            else:
                dictItem = {"location": f"{val}"}
                locations.append(dictItem)

    
# Function - Generate test data
def testData(parsedHTML, typeProc):

    # typeProc == True | Export data to text files
    if typeProc:
        #getJobs(parsedHTML)

        with open("jobs.txt", "w") as file:
            for line in jobs:
                file.write(str(line))
                file.write("\n")
            file.close()
        
        with open("names.txt", "w") as file:
            for line in names:
                file.write(str(line))
                file.write("\n")
            file.close()
        
        with open("dates.txt", "w") as file:
            for line in dates:
                file.write(str(line))
                file.write("\n")
            file.close()
        
        with open("summaries.txt", "w") as file:
            for line in summaries:
                file.write(str(line))
                file.write("\n")
            file.close()
        
        with open("locations.txt", "w") as file:
            for line in locations:
                file.write(str(line))
                file.write("\n")
            file.close()
    
    # typeProc == False | Import data from txt files, convert to dictionary and append to list
    elif typeProc == False:
        with open("jobs.txt", "r") as file:
            content = file.readlines()
            for i in range(len(content)):
                content[i] = content[i].replace("\n", "")
                content[i] = ast.literal_eval(content[i])
                jobs.append(content[i])
            file.close()

        with open("names.txt", "r") as file:
            content = file.readlines()
            for i in range(len(content)):
                content[i] = content[i].replace("\n", "")
                content[i] = ast.literal_eval(content[i])
                names.append(content[i])
            file.close()

        with open("dates.txt", "r") as file:
            content = file.readlines()
            for i in range(len(content)):
                content[i] = content[i].replace("\n", "")
                content[i] = ast.literal_eval(content[i])
                dates.append(content[i])
            file.close()

        with open("summaries.txt", "r") as file:
            content = file.readlines()
            for i in range(len(content)):
                content[i] = content[i].replace("\n", "")
                content[i] = ast.literal_eval(content[i])
                summaries.append(content[i])
            file.close()

        with open("locations.txt", "r") as file:
            content = file.readlines()
            for i in range(len(content)):
                content[i] = content[i].replace("\n", "")
                content[i] = ast.literal_eval(content[i])
                locations.append(content[i])
            file.close()

    # Else | If this else is hit, something is greatly fvcked
    else:
        print("Function: testData | Error: if statement else output")
        sys.exit(1)

# Function - Remove items from all lists
def wipeLists():
    jobs.clear()
    names.clear()
    dates.clear()
    summaries.clear()
    locations.clear()

# Function - JSON Blob Generator
def genJSON(parsedHTML):
    # Testing with cached local IRL data
    #testData(parsedHTML, False)

    getJobs(parsedHTML)
    jsonBlob = []
    
    # Merge dictionaries | Combining dictionaries into single object + Append to jsonBlob list
    for i in range(len(jobs)):
        sumObj = {**jobs[i], **names[i], **dates[i], **summaries[i], **locations[i]}
        #sumObj = {**jobs[i], **names[i], **dates[i], **summaries[i]}
        jsonBlob.append(sumObj)

    return jsonBlob
# Loop - Get job location
for div in parsedHTML.find_all(name='div', attrs={'class':'sjcl'}):
    for divChild in div.find_all(name='div', attrs={'class':'recJobLoc'}):
        dictItem = {"location": f"{divChild['data-rc-loc']}"}
        locations.append(dictItem)

感谢每一位试图帮助我们的人。这已经解决。

如果我是你,我担心的不是处理错误,而是解决问题。所以有时候位置不见了?哪张唱片不见了?第一张唱片中有没有丢失?那么所有的记录都会有不正确的位置。事实上,IndexError正在帮助您避免造成混乱。如果您试图解析的记录中没有“位置”项,“null”占位符将失败。@pavel-问题已解决,谢谢您的评论。我还添加了我自己的答案。