div>a')。获取属性('href') contentAggregator.append(innerWebSiteButtonURL) mainText=driver.find\u elements\u by\u css\u选择器( 'div.post_content.clearfix>p') mainContentText=[] 对于mainText中的mainContentDetail: mainContentText.append(mainContentDetail.text) mainContextTextCount=len(mainContentText)-1 contentAggregator.append(mainContentText[:mainContextTextCount]) contentReorder=[1,0,10,11,5,6,7,8,13,3,4,9,12,2] contentAggregator=[contentReorder中的i的contentAggregator[i] data=data.append(contentAggregator) df=pd.DataFrame(数据,列=[“地区”、“总列表编号”、“标题”、“事件有效期”、“可用期限”、“可用开始日期”, “结束日期”、“上次更新”、“主文本”、“位置标记”、“事件标记”、“主图像URL”、“innerWebSiteURL”、“ListLink”]) contentDict={ “地区”:“地区”, “总列表编号”:“, “名称”:“名称”, “事件有效性”:“, “有效期”:“有效期”, “可用起始日期”:“, “结束日期”:“结束日期”, “上次更新”:“, “主要文本”:“, “位置标记”:“”, “事件标记”:“”, “主图像URL”:“”, “innerWebSiteURL”:“”, “列表链接”:” } 打印(类型(contentAggregator[8])) 打印(类型(contentAggregator[9])) 打印(类型(contentAggregator[10])) 对于枚举中的i,k(contentDict): 如果类型(contentAggregator[i])==isinstance(contentAggregator[i],列表): contentDict[k]=contentAggregator[i] 其他: contentDict[k]=[contentAggregator[i]] 打印(内容聚合器) 打印(contentDict) 引擎=创建引擎(“mysql+mysqldb://root:abcdefgH1@localhost/stamprallydb”,编码='utf-8') df2=pd.DataFrame(数据=contentDict) #df2=df.transpose() conn=引擎连接() df2.to_sql(name='stamprallydb_crawl_result', con=引擎,如果存在(追加),索引=True) 其他: 地区结束=时间。时间() timeStampData.append(郡县结束郡县开始) ROWADDTIMESTAMP=pd.系列( timeStamp数据,索引=timeStamp.columns) timeStamp=timeStamp.append( RowAddIndex,忽略_index=True) timeStampData.clear() excelFileStart=time.time() xlwriter=pd.ExcelWriter('StampRally\u Crawler.xlsx')) df.to_excel(xlwriter,sheet_name=“Stamprally.org爬网结果”) excelFileEnd=time.time() timeStampData.append(“엑셀 파일 저장") timeStampData.append(excelFileStart) timeStampData.append(excelFileEnd excelFileStart) rowaddtimestamp=pd.Series(timeStampData,index=timeStamp.columns) timeStamp=timeStamp.append(rowaddtimestamp,ignore_index=True) 时间戳。到excel(xlwriter,sheet_name=“时间戳结果”) xlwriter.close() 驱动程序关闭() driver.quit() sys.exit(),python,if-statement,types,Python,If Statement,Types" /> div>a')。获取属性('href') contentAggregator.append(innerWebSiteButtonURL) mainText=driver.find\u elements\u by\u css\u选择器( 'div.post_content.clearfix>p') mainContentText=[] 对于mainText中的mainContentDetail: mainContentText.append(mainContentDetail.text) mainContextTextCount=len(mainContentText)-1 contentAggregator.append(mainContentText[:mainContextTextCount]) contentReorder=[1,0,10,11,5,6,7,8,13,3,4,9,12,2] contentAggregator=[contentReorder中的i的contentAggregator[i] data=data.append(contentAggregator) df=pd.DataFrame(数据,列=[“地区”、“总列表编号”、“标题”、“事件有效期”、“可用期限”、“可用开始日期”, “结束日期”、“上次更新”、“主文本”、“位置标记”、“事件标记”、“主图像URL”、“innerWebSiteURL”、“ListLink”]) contentDict={ “地区”:“地区”, “总列表编号”:“, “名称”:“名称”, “事件有效性”:“, “有效期”:“有效期”, “可用起始日期”:“, “结束日期”:“结束日期”, “上次更新”:“, “主要文本”:“, “位置标记”:“”, “事件标记”:“”, “主图像URL”:“”, “innerWebSiteURL”:“”, “列表链接”:” } 打印(类型(contentAggregator[8])) 打印(类型(contentAggregator[9])) 打印(类型(contentAggregator[10])) 对于枚举中的i,k(contentDict): 如果类型(contentAggregator[i])==isinstance(contentAggregator[i],列表): contentDict[k]=contentAggregator[i] 其他: contentDict[k]=[contentAggregator[i]] 打印(内容聚合器) 打印(contentDict) 引擎=创建引擎(“mysql+mysqldb://root:abcdefgH1@localhost/stamprallydb”,编码='utf-8') df2=pd.DataFrame(数据=contentDict) #df2=df.transpose() conn=引擎连接() df2.to_sql(name='stamprallydb_crawl_result', con=引擎,如果存在(追加),索引=True) 其他: 地区结束=时间。时间() timeStampData.append(郡县结束郡县开始) ROWADDTIMESTAMP=pd.系列( timeStamp数据,索引=timeStamp.columns) timeStamp=timeStamp.append( RowAddIndex,忽略_index=True) timeStampData.clear() excelFileStart=time.time() xlwriter=pd.ExcelWriter('StampRally\u Crawler.xlsx')) df.to_excel(xlwriter,sheet_name=“Stamprally.org爬网结果”) excelFileEnd=time.time() timeStampData.append(“엑셀 파일 저장") timeStampData.append(excelFileStart) timeStampData.append(excelFileEnd excelFileStart) rowaddtimestamp=pd.Series(timeStampData,index=timeStamp.columns) timeStamp=timeStamp.append(rowaddtimestamp,ignore_index=True) 时间戳。到excel(xlwriter,sheet_name=“时间戳结果”) xlwriter.close() 驱动程序关闭() driver.quit() sys.exit(),python,if-statement,types,Python,If Statement,Types" />

Python 通过css选择器( “h2#post_title”)。文本。替换(“開催終了", "") postTitle=postTitle1.替换(“ただいま開催中", "") removeSpecialChars=postTitle.translate( {ord(c):“!@$%^&*()[]{};:,./?\\\\\\\\\\\`-=\+”}中的c) postTitle=删除特殊字符 contentAggregator.append(postTitle) eventValidity=driver.find\u元素( “//*[@id='post_title']/span”)。文本 contentAggregator.append(eventValidity) urllib.request.urlretrieve(mainImageUrl,(str( 郡名列表[郡名迭代器]+postTitle+str(imageDownloadCounter)+“.png”)) imageDownloadCounter+=1 郡县名称迭代器+=1 innerWebSiteButtonURL=驱动程序。通过css选择器查找元素( 'div.post\u content.clearfix>div>a')。获取属性('href') contentAggregator.append(innerWebSiteButtonURL) mainText=driver.find\u elements\u by\u css\u选择器( 'div.post_content.clearfix>p') mainContentText=[] 对于mainText中的mainContentDetail: mainContentText.append(mainContentDetail.text) mainContextTextCount=len(mainContentText)-1 contentAggregator.append(mainContentText[:mainContextTextCount]) contentReorder=[1,0,10,11,5,6,7,8,13,3,4,9,12,2] contentAggregator=[contentReorder中的i的contentAggregator[i] data=data.append(contentAggregator) df=pd.DataFrame(数据,列=[“地区”、“总列表编号”、“标题”、“事件有效期”、“可用期限”、“可用开始日期”, “结束日期”、“上次更新”、“主文本”、“位置标记”、“事件标记”、“主图像URL”、“innerWebSiteURL”、“ListLink”]) contentDict={ “地区”:“地区”, “总列表编号”:“, “名称”:“名称”, “事件有效性”:“, “有效期”:“有效期”, “可用起始日期”:“, “结束日期”:“结束日期”, “上次更新”:“, “主要文本”:“, “位置标记”:“”, “事件标记”:“”, “主图像URL”:“”, “innerWebSiteURL”:“”, “列表链接”:” } 打印(类型(contentAggregator[8])) 打印(类型(contentAggregator[9])) 打印(类型(contentAggregator[10])) 对于枚举中的i,k(contentDict): 如果类型(contentAggregator[i])==isinstance(contentAggregator[i],列表): contentDict[k]=contentAggregator[i] 其他: contentDict[k]=[contentAggregator[i]] 打印(内容聚合器) 打印(contentDict) 引擎=创建引擎(“mysql+mysqldb://root:abcdefgH1@localhost/stamprallydb”,编码='utf-8') df2=pd.DataFrame(数据=contentDict) #df2=df.transpose() conn=引擎连接() df2.to_sql(name='stamprallydb_crawl_result', con=引擎,如果存在(追加),索引=True) 其他: 地区结束=时间。时间() timeStampData.append(郡县结束郡县开始) ROWADDTIMESTAMP=pd.系列( timeStamp数据,索引=timeStamp.columns) timeStamp=timeStamp.append( RowAddIndex,忽略_index=True) timeStampData.clear() excelFileStart=time.time() xlwriter=pd.ExcelWriter('StampRally\u Crawler.xlsx')) df.to_excel(xlwriter,sheet_name=“Stamprally.org爬网结果”) excelFileEnd=time.time() timeStampData.append(“엑셀 파일 저장") timeStampData.append(excelFileStart) timeStampData.append(excelFileEnd excelFileStart) rowaddtimestamp=pd.Series(timeStampData,index=timeStamp.columns) timeStamp=timeStamp.append(rowaddtimestamp,ignore_index=True) 时间戳。到excel(xlwriter,sheet_name=“时间戳结果”) xlwriter.close() 驱动程序关闭() driver.quit() sys.exit()

Python 通过css选择器( “h2#post_title”)。文本。替换(“開催終了", "") postTitle=postTitle1.替换(“ただいま開催中", "") removeSpecialChars=postTitle.translate( {ord(c):“!@$%^&*()[]{};:,./?\\\\\\\\\\\`-=\+”}中的c) postTitle=删除特殊字符 contentAggregator.append(postTitle) eventValidity=driver.find\u元素( “//*[@id='post_title']/span”)。文本 contentAggregator.append(eventValidity) urllib.request.urlretrieve(mainImageUrl,(str( 郡名列表[郡名迭代器]+postTitle+str(imageDownloadCounter)+“.png”)) imageDownloadCounter+=1 郡县名称迭代器+=1 innerWebSiteButtonURL=驱动程序。通过css选择器查找元素( 'div.post\u content.clearfix>div>a')。获取属性('href') contentAggregator.append(innerWebSiteButtonURL) mainText=driver.find\u elements\u by\u css\u选择器( 'div.post_content.clearfix>p') mainContentText=[] 对于mainText中的mainContentDetail: mainContentText.append(mainContentDetail.text) mainContextTextCount=len(mainContentText)-1 contentAggregator.append(mainContentText[:mainContextTextCount]) contentReorder=[1,0,10,11,5,6,7,8,13,3,4,9,12,2] contentAggregator=[contentReorder中的i的contentAggregator[i] data=data.append(contentAggregator) df=pd.DataFrame(数据,列=[“地区”、“总列表编号”、“标题”、“事件有效期”、“可用期限”、“可用开始日期”, “结束日期”、“上次更新”、“主文本”、“位置标记”、“事件标记”、“主图像URL”、“innerWebSiteURL”、“ListLink”]) contentDict={ “地区”:“地区”, “总列表编号”:“, “名称”:“名称”, “事件有效性”:“, “有效期”:“有效期”, “可用起始日期”:“, “结束日期”:“结束日期”, “上次更新”:“, “主要文本”:“, “位置标记”:“”, “事件标记”:“”, “主图像URL”:“”, “innerWebSiteURL”:“”, “列表链接”:” } 打印(类型(contentAggregator[8])) 打印(类型(contentAggregator[9])) 打印(类型(contentAggregator[10])) 对于枚举中的i,k(contentDict): 如果类型(contentAggregator[i])==isinstance(contentAggregator[i],列表): contentDict[k]=contentAggregator[i] 其他: contentDict[k]=[contentAggregator[i]] 打印(内容聚合器) 打印(contentDict) 引擎=创建引擎(“mysql+mysqldb://root:abcdefgH1@localhost/stamprallydb”,编码='utf-8') df2=pd.DataFrame(数据=contentDict) #df2=df.transpose() conn=引擎连接() df2.to_sql(name='stamprallydb_crawl_result', con=引擎,如果存在(追加),索引=True) 其他: 地区结束=时间。时间() timeStampData.append(郡县结束郡县开始) ROWADDTIMESTAMP=pd.系列( timeStamp数据,索引=timeStamp.columns) timeStamp=timeStamp.append( RowAddIndex,忽略_index=True) timeStampData.clear() excelFileStart=time.time() xlwriter=pd.ExcelWriter('StampRally\u Crawler.xlsx')) df.to_excel(xlwriter,sheet_name=“Stamprally.org爬网结果”) excelFileEnd=time.time() timeStampData.append(“엑셀 파일 저장") timeStampData.append(excelFileStart) timeStampData.append(excelFileEnd excelFileStart) rowaddtimestamp=pd.Series(timeStampData,index=timeStamp.columns) timeStamp=timeStamp.append(rowaddtimestamp,ignore_index=True) 时间戳。到excel(xlwriter,sheet_name=“时间戳结果”) xlwriter.close() 驱动程序关闭() driver.quit() sys.exit(),python,if-statement,types,Python,If Statement,Types,type返回传递给它的参数的类型对象。isinstance返回一个布尔值,描述第一个参数是否是第二个参数的实例。它们永远不能相等。长话短说,请删除type表达式,只计算isinstance表达式: if isinstance(contentAggregator[i], list): 谢谢我现在拿到了 # This Python file uses the following encoding: utf-8 import pymysql from sqlalchemy import creat

type
返回传递给它的参数的类型对象。
isinstance
返回一个布尔值,描述第一个参数是否是第二个参数的实例。它们永远不能相等。长话短说,请删除
type
表达式,只计算
isinstance
表达式:

if isinstance(contentAggregator[i], list):

谢谢我现在拿到了
# This Python file uses the following encoding: utf-8

import pymysql
from sqlalchemy import create_engine
pymysql.install_as_MySQLdb()
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium import webdriver as wd
from Stamprally import StamprallyInfo
import re
import pandas as pd
import numpy as np
import urllib.request
import math
import time
import sys
import os
import MySQLdb

programStart = time.time()


prefectureNameList = ["北海道", "青森県", "岩手県", "宮城県", "秋田県", "山形県", "福島県", "茨城県", "栃木県", "群馬県", "埼玉県", "千葉県", "東京都", "神奈川県", "新潟県", "富山県", "石川県", "福井県", "山梨県", "長野県", "岐阜県", "静岡県",
                      "愛知県", "三重県", "滋賀県", "京都府", "大阪府", "兵庫県", "奈良県", "和歌山県", "鳥取県", "島根県", "岡山県", "広島県", "山口県", "徳島県", "香川県", "愛媛県", "高知県", "福岡県", "佐賀県", "長崎県", "熊本県", "大分県", "宮崎県", "鹿児島県", "沖縄県"]


data = []
timeStampData = []
contentAggregator = []
timeStampData.append("프로그램 시작")
timeStampData.append(programStart)
main_url = 'https://stamprally.org/'
programEnd = time.time()
timeStampData.append(programEnd - programStart)
timeStamp = pd.DataFrame(np.array([timeStampData]), columns=[
                         '설명', 'TimeStamp', '소요기간'])

timeStampData.clear()
timeStampData.append("셀레니엄 드라이버 로딩")
seleniumStart = time.time()
timeStampData.append(seleniumStart)
driver = wd.Chrome(executable_path='chromedriver.exe')
driver.get(main_url)
seleniumEnd = time.time()
timeStampData.append(seleniumEnd - seleniumStart)
rowAddTimeStampSelenium = pd.Series(timeStampData, index=timeStamp.columns)
timeStamp = timeStamp.append(rowAddTimeStampSelenium, ignore_index=True)
timeStampData.clear()
prefectureValueStorage = [x.get_attribute('value') for x in driver.find_elements_by_xpath(
    "//select[@name='search_cat1']/option[@class='level-1']")]
prefectureNameIterator = 0
for prefectureValue in prefectureValueStorage:
    prefectureStart = time.time()
    timeStampData.append(prefectureNameList[prefectureNameIterator])
    timeStampData.append(prefectureStart)
    driver.get(
        f"https://stamprally.org/?search_keywords&search_keywords_operator=and&search_cat1={prefectureValue}&search_cat2=0")
    imageDownloadCounter = 1
    totalList = driver.find_element_by_css_selector(
        'div.page_navi2.clearfix>p').text 
    totalListNum = totalList.split("件中")
    if int(totalListNum[0]) % 10 != 0:
        pageLoopCount = math.ceil((int(totalListNum[0])/10))
    else:
        pageLoopCount = int(totalListNum[0])/10
    currentpage = 0
    while currentpage < pageLoopCount:
        currentpage += 1
        driver.get(
            f"https://stamprally.org/?search_keywords&search_keywords_operator=and&search_cat1={prefectureValue}&search_cat2=0&paged={currentpage}")
        urlList = []
        currentUrlCounter = 0
        listURLContainer = driver.find_elements_by_css_selector(
            '#post_list2 > li > a')
        for url in listURLContainer:
            urlList.append(url.get_attribute('href'))
        for listURL in listURLContainer:
            contentAggregator = []
            contentAggregator.append(int(totalListNum[0]))
            contentAggregator.append(
                prefectureNameList[prefectureNameIterator])
            contentAggregator.append(
                urlList[currentUrlCounter])
            driver.get(urlList[currentUrlCounter])
            currentUrlCounter += 1
            locationTag = [x.get_attribute('title') for x in driver.find_elements_by_xpath(
                "//*[@id='post_meta_top']/li[1]/a[@class='cat-category']")]
            contentAggregator.append(locationTag)
            eventTag = [x.get_attribute('title') for x in driver.find_elements_by_xpath(
                "//*[@id='post_meta_top']/li[2]/a[@class='cat-category2']")]
            contentAggregator.append(eventTag)
            availablePeriod = (driver.find_element_by_css_selector(
                'div#post_date')).text.split("( ")
            availablePeriodFormatted = availablePeriod[0].replace("開催期間:", "")
            availableStartDate = availablePeriod[0].split(" ~ ")
            endDate = availableStartDate[1]
            availableStartDateFormatted = availableStartDate[0].replace(
                "開催期間:", "")
            lastUpdatedDate = driver.find_element_by_css_selector(
                'time.entry-date.updated').text
            contentAggregator.append(availablePeriodFormatted)
            contentAggregator.append(availableStartDateFormatted)
            contentAggregator.append(endDate)
            contentAggregator.append(lastUpdatedDate[6:])
            mainImageUrl = driver.find_element_by_css_selector(
                'img.attachment-post-thumbnail.size-post-thumbnail.wp-post-image').get_attribute('src')
            contentAggregator.append(mainImageUrl)
            postTitle1 = driver.find_element_by_css_selector(
                'h2#post_title').text.replace("開催終了", "")
            postTitle = postTitle1.replace("ただいま開催中", "")
            removeSpecialChars = postTitle.translate(
                {ord(c): " " for c in "!@#$%^&*()[]{};:,./<>?\|`~-=_+"})
            postTitle = removeSpecialChars
            contentAggregator.append(postTitle)
            eventValidity = driver.find_element_by_xpath(
                "//*[@id='post_title']/span").text
            contentAggregator.append(eventValidity)
            urllib.request.urlretrieve(mainImageUrl, (str(
                prefectureNameList[prefectureNameIterator])+postTitle+str(imageDownloadCounter) + ".png"))
            imageDownloadCounter += 1
            prefectureNameIterator += 1
            innerWebSiteButtonURL = driver.find_element_by_css_selector(
                'div.post_content.clearfix > div >a').get_attribute('href')
            contentAggregator.append(innerWebSiteButtonURL)
            mainText = driver.find_elements_by_css_selector(
                'div.post_content.clearfix > p')
            mainContentText = []
            for mainContentDetail in mainText:
                mainContentText.append(mainContentDetail.text)
            mainContextTextCount = len(mainContentText)-1
            contentAggregator.append(mainContentText[:mainContextTextCount])
            contentReorder = [1, 0, 10, 11, 5, 6, 7, 8, 13, 3, 4, 9, 12, 2]
            contentAggregator = [contentAggregator[i] for i in contentReorder]
            data = data.append(contentAggregator)
            df = pd.DataFrame(data, columns=["Prefecture", "Total List Number", "Title", "Event Validity", "Available Period", "Available StartDate",
                                             "End Date", "Last Updated",  "mainText", "Location Tag", "Event Tag", "Main Image URL", "innerWebSiteURL", "ListLink"])

            contentDict = {
                "Prefecture": "",
                "Total List Number": "",
                "Title": "",
                "Event Validity": "",
                "Available Period": "",
                "Available StartDate": "",
                "End Date": "",
                "Last Updated": "",
                "mainText": "",
                "Location Tag": "",
                "Event Tag": "",
                "Main Image URL": "",
                "innerWebSiteURL": "",
                "ListLink": ""
            }

            print(type(contentAggregator[8]))
            print(type(contentAggregator[9]))
            print(type(contentAggregator[10]))
            for i, k in enumerate(contentDict):
                if type(contentAggregator[i]) == isinstance(contentAggregator[i],list):
                    contentDict[k] = contentAggregator[i]
                else:     
                    contentDict[k] = [contentAggregator[i]]
            print(contentAggregator)
            print(contentDict)
            
            engine = create_engine("mysql+mysqldb://root:abcdefgH1@localhost/stamprallydb", encoding='utf-8')
            df2 = pd.DataFrame(data=contentDict)
            #df2=df.transpose()
            conn = engine.connect()
            df2.to_sql(name='stamprallydb_crawl_result',
                       con=engine, if_exists='append', index=True)

    else:
        prefectureEnd = time.time()
        timeStampData.append(prefectureEnd-prefectureStart)
        rowAddTimeStampPrefecture = pd.Series(
            timeStampData, index=timeStamp.columns)
        timeStamp = timeStamp.append(
            rowAddTimeStampPrefecture, ignore_index=True)
        timeStampData.clear()

excelFileStart = time.time()
xlwriter = pd.ExcelWriter('StampRally_Crawler.xlsx')
df.to_excel(xlwriter, sheet_name="Stamprally.org Crawl Result")
excelFileEnd = time.time()
timeStampData.append("엑셀 파일 저장")
timeStampData.append(excelFileStart)
timeStampData.append(excelFileEnd-excelFileStart)
rowAddTimeStampPrefecture = pd.Series(timeStampData, index=timeStamp.columns)
timeStamp = timeStamp.append(rowAddTimeStampPrefecture, ignore_index=True)
timeStamp.to_excel(xlwriter, sheet_name="TimeStamp Result")

xlwriter.close()
driver.close()
driver.quit()
sys.exit()
if isinstance(contentAggregator[i], list):