Warning: file_get_contents(/data/phpspider/zhask/data//catemap/8/mysql/65.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
操作数应仅包含一列错误-to_sql()python mysql_Python_Mysql_Sql_Sqlalchemy_Pymysql - Fatal编程技术网

操作数应仅包含一列错误-to_sql()python mysql

操作数应仅包含一列错误-to_sql()python mysql,python,mysql,sql,sqlalchemy,pymysql,Python,Mysql,Sql,Sqlalchemy,Pymysql,我似乎无法找出下面的错误 我基本上是在网站上爬行,希望每次mysql访问一系列网站时都能将行填充到mysql中。它可以像预期的那样很好地打印到excel工作表中,但对于mysql则不行,因为它面临操作错误操作数应该只包含1列 添加了几行,发现数据帧有1行13列,这可能是错误的原因-操作数应该只包含一列错误 产生的错误行位于: df.to_sql(name='stamprallydb_crawl_result', con=engine, if_exists='append',

我似乎无法找出下面的错误

我基本上是在网站上爬行,希望每次mysql访问一系列网站时都能将行填充到mysql中。它可以像预期的那样很好地打印到excel工作表中,但对于mysql则不行,因为它面临操作错误操作数应该只包含1列

添加了几行,发现数据帧有1行13列,这可能是错误的原因-操作数应该只包含一列错误

产生的错误行位于:

df.to_sql(name='stamprallydb_crawl_result',
          con=engine, if_exists='append', index=False)
开始从中读取代码会很有帮助

代码如下,谢谢

import pymysql
from sqlalchemy import create_engine
pymysql.install_as_MySQLdb()
from Stamprally import StamprallyInfo
from selenium import webdriver as wd
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
import os
import sys
import time
import math
import urllib.request
import numpy as np
import pandas as pd
import re
import MySQLdb



# Start Recording Program Loading Time
programStart = time.time()

prefectureNameList = ["北海道", "青森県", "岩手県", "宮城県", "秋田県", "山形県", "福島県", "茨城県", "栃木県", "群馬県", "埼玉県", "千葉県", "東京都", "神奈川県", "新潟県", "富山県", "石川県", "福井県", "山梨県", "長野県", "岐阜県", "静岡県",
                      "愛知県", "三重県", "滋賀県", "京都府", "大阪府", "兵庫県", "奈良県", "和歌山県", "鳥取県", "島根県", "岡山県", "広島県", "山口県", "徳島県", "香川県", "愛媛県", "高知県", "福岡県", "佐賀県", "長崎県", "熊本県", "大分県", "宮崎県", "鹿児島県", "沖縄県"]

#prefectureNameList = ["東京都"]

data = []
timeStampData = []

#timeStampDataAggregate = []
contentAggregator = []
timeStampData.append("프로그램 시작")
timeStampData.append(programStart)
# db = Db()
main_url = 'https://stamprally.org/'
# 스탬프랠리 정보를 담는 리스트
# stamprally_list = []
# 드라이브 로드
# For MAC users driver = wd.Chrome(executable_path='./chromedriver')
# End Recording Program Loading Time
programEnd = time.time()
timeStampData.append(programEnd - programStart)
# timeStampDataAggregate.append(timeStampData)

timeStamp = pd.DataFrame(np.array([timeStampData]), columns=[
                         '설명', 'TimeStamp', '소요기간'])


print(timeStamp)

timeStampData.clear()
print(timeStampData)
# Start Recording Selenium Driver Loading
print("셀레니엄 ")
timeStampData.append("셀레니엄 드라이버 로딩")
seleniumStart = time.time()
timeStampData.append(seleniumStart)
driver = wd.Chrome(executable_path='chromedriver.exe')
driver.get(main_url)
seleniumEnd = time.time()
timeStampData.append(seleniumEnd - seleniumStart)
print(timeStampData)
rowAddTimeStampSelenium = pd.Series(timeStampData, index=timeStamp.columns)
timeStamp = timeStamp.append(rowAddTimeStampSelenium, ignore_index=True)
# timeStampDataAggregate.append(timeStampData)
timeStampData.clear()

# =============================================================================================
prefectureValueStorage = [x.get_attribute('value') for x in driver.find_elements_by_xpath(
    "//select[@name='search_cat1']/option[@class='level-1']")]
# =============================================================================================
prefectureNameIterator = -1
print("START OF PREFECTURE " + prefectureNameList[prefectureNameIterator])
# Loop through all the different prefectures
for prefectureValue in prefectureValueStorage:
    print("Prefecture Start Time")
    prefectureStart = time.time()
    prefectureNameIterator += 1
    # Add Prefecture Name to timeStampData
    timeStampData.append(prefectureNameList[prefectureNameIterator])
    timeStampData.append(prefectureStart)
    print(timeStampData)
    driver.get(
        f"https://stamprally.org/?search_keywords&search_keywords_operator=and&search_cat1={prefectureValue}&search_cat2=0")


# Calculate How Many Times To Run Page Loop
    imageDownloadCounter = 1
    totalList = driver.find_element_by_css_selector(
        'div.page_navi2.clearfix>p').text  # get_attribute('text')
    totalListNum = totalList.split("件中")
    # Add TotalListNum to the contentAggregator
    # contentAggregator.append(int(totalListNum[0]))
    if int(totalListNum[0]) % 10 != 0:
        pageLoopCount = math.ceil((int(totalListNum[0])/10))
    else:
        pageLoopCount = int(totalListNum[0])/10
    # continue
    currentpage = 0
    while currentpage < pageLoopCount:
        currentpage += 1
        print("Current Page " + str(currentpage))
# # Loop through all the Listings within the prefecture page
        driver.get(
            f"https://stamprally.org/?search_keywords&search_keywords_operator=and&search_cat1={prefectureValue}&search_cat2=0&paged={currentpage}")
        # print("Loading Page %s" % currentpage)
        # ========================================================================================================================================================
        # Add prefectureName to the contentAggregator
        # contentAggregator.append(prefectureNameList[prefectureNameIterator])
        # Gather All List Links
        urlList = []
        currentUrlCounter = 0
        listURLContainer = driver.find_elements_by_css_selector(
            '#post_list2 > li > a')
        # Put all the lists in one Array
        for url in listURLContainer:
            urlList.append(url.get_attribute('href'))
        # Loop through all the links
        for listURL in listURLContainer:
            contentAggregator = []
    # Add TotalListNum to the contentAggregator
            contentAggregator.append(int(totalListNum[0]))
    # Add prefectureName to the contentAggregator
            contentAggregator.append(
                prefectureNameList[prefectureNameIterator])
            #print('article Link: ')
            # print(urlList[currentUrlCounter])
        # Add listLink to the contentAggregator
            contentAggregator.append(
                urlList[currentUrlCounter])
        # for Each Links in listURLContainer:
            driver.get(urlList[currentUrlCounter])
            currentUrlCounter += 1
            locationTag = [x.get_attribute('title') for x in driver.find_elements_by_xpath(
                "//*[@id='post_meta_top']/li[1]/a[@class='cat-category']")]
            # print(locationTag)
        # Add locationTag to the contentAggregator
            contentAggregator.append(locationTag)

            eventTag = [x.get_attribute('title') for x in driver.find_elements_by_xpath(
                "//*[@id='post_meta_top']/li[2]/a[@class='cat-category2']")]
        # Add eventTag to the contentAggregator
            contentAggregator.append(eventTag)

        # Select, Post Date & Remove Non-Text Variable
            availablePeriod = (driver.find_element_by_css_selector(
                'div#post_date')).text.split("( ")
            availablePeriodFormatted = availablePeriod[0].replace("開催期間:", "")
            availableStartDate = availablePeriod[0].split(" ~ ")
            endDate = availableStartDate[1]
            availableStartDateFormatted = availableStartDate[0].replace(
                "開催期間:", "")
        # Select Latest Update Date
            lastUpdatedDate = driver.find_element_by_css_selector(
                'time.entry-date.updated').text
            #print("Available Period:")
            # print(availablePeriodFormatted)
        # Add Available Period to the contentAggregator
            contentAggregator.append(availablePeriodFormatted)
            #print("Available StartDate:")
            # print(availableStartDateFormatted)
        # Add Available StartDate to the contentAggregator
            contentAggregator.append(availableStartDateFormatted)
            #print("End Date: ")
            # print(endDate)
        # Add endDate to the contentAggregator
            contentAggregator.append(endDate)
            #print("Last Updated:")
            # print(lastUpdatedDate[6:])
        # Add lastUpdatedDate to the contentAggregator
            contentAggregator.append(lastUpdatedDate[6:])
    # ========================================================================================================================================================
        # Download Main Post Image
            mainImageUrl = driver.find_element_by_css_selector(
                'img.attachment-post-thumbnail.size-post-thumbnail.wp-post-image').get_attribute('src')
        # Add lastUpdatedDate to the contentAggregator
            contentAggregator.append(mainImageUrl)
        # Save Post Main Title
            # postTitle = driver.find_element_by_xpath(
            #     "//*[@id='post_title']/text()")
            postTitle1 = driver.find_element_by_css_selector(
                'h2#post_title').text.replace("開催終了", "")
            postTitle = postTitle1.replace("ただいま開催中", "")
        # Replace all special characters in a string with empty string
            # Pass the string in search

            # specialCharacter = "!@#$%^&*()-+?_=,<>/"
            # if any(character in specialCharacter for character in postTitle):
            removeSpecialChars = postTitle.translate(
                {ord(c): " " for c in "!@#$%^&*()[]{};:,./<>?\|`~-=_+"})
            postTitle = removeSpecialChars
            print(postTitle)
            # else:
            #     continue

            # Add Title to the contentAggregator
            contentAggregator.append(postTitle)
            #print("Title: ")
            # print(postTitle)
        # Find out Event validty status
            eventValidity = driver.find_element_by_xpath(
                "//*[@id='post_title']/span").text

        # Add eventValidity to the contentAggregator
            contentAggregator.append(eventValidity)

        # Save Post Main Image
            urllib.request.urlretrieve(mainImageUrl, (str(
                prefectureNameList[prefectureNameIterator])+postTitle+str(imageDownloadCounter) + ".png"))
            imageDownloadCounter += 1
        # Get Inner Website Link
            innerWebSiteButtonURL = driver.find_element_by_css_selector(
                'div.post_content.clearfix > div >a').get_attribute('href')
            #print("inner Website Button URL: " + innerWebSiteButtonURL)
        # Add innerWebSiteURL to the contentAggregator
            contentAggregator.append(innerWebSiteButtonURL)
        # Gather Main Post Text Content
            mainText = driver.find_elements_by_css_selector(
                'div.post_content.clearfix > p')
            mainContentText = []
        # Remove Disclamimer text
            for mainContentDetail in mainText:
                mainContentText.append(mainContentDetail.text)
            mainContextTextCount = len(mainContentText)-1
            # print(mainContentText[:mainContextTextCount])
        # Add Main Post Text Content to the contentAggregator
            contentAggregator.append(mainContentText[:mainContextTextCount])

    # ========================================================================================================================================================
            contentReorder = [1, 0, 10, 11, 5, 6, 7, 8, 13, 3, 4, 9, 12, 2]
            contentAggregator = [contentAggregator[i] for i in contentReorder]
            print("=====================================================================================================================================================")
            # print(contentAggregator)
            data.append(contentAggregator)
            # print(data)
            # print(pd.DataFrame(data, columns=["Total List Number", "Prefecture", "ListLink", "Location Tag", "Event Tag", "Available Period",
            #                                   "Available StartDate", "End Date", "Last Updated", "Main Image URL", "Title","Event Validty" "innerWebSiteURL", "mainText"]))
            df = pd.DataFrame(data, columns=["Prefecture", "Total List Number", "Title", "Event Validity", "Available Period", "Available StartDate",
                                             "End Date", "Last Updated",  "mainText", "Location Tag", "Event Tag", "Main Image URL", "innerWebSiteURL", "ListLink"])

df2 = pd.DataFrame(data)
print = df2
# try:
engine = create_engine(

            # try:
            engine = create_engine(
                "mysql+mysqldb://root:abcdefgH1@localhost/stamprallydb", encoding='utf-8')
            conn = engine.connect()
            df.to_sql(name='stamprallydb_crawl_result',
                      con=engine, if_exists='append', index=False)
            # except:
            #     print("Error in Mysql connection")
    else:
        prefectureEnd = time.time()
        timeStampData.append(prefectureEnd-prefectureStart)
        rowAddTimeStampPrefecture = pd.Series(
            timeStampData, index=timeStamp.columns)
        timeStamp = timeStamp.append(
            rowAddTimeStampPrefecture, ignore_index=True)
        timeStampData.clear()
        # timeStampDataAggregate.append(timeStampData)

excelFileStart = time.time()
xlwriter = pd.ExcelWriter('StampRally_Crawler.xlsx')
df.to_excel(xlwriter, sheet_name="Stamprally.org Crawl Result")
# Add last Series "엑셀 파일 저장"
excelFileEnd = time.time()
timeStampData.append("엑셀 파일 저장")
timeStampData.append(excelFileStart)
timeStampData.append(excelFileEnd-excelFileStart)
rowAddTimeStampPrefecture = pd.Series(timeStampData, index=timeStamp.columns)
timeStamp = timeStamp.append(rowAddTimeStampPrefecture, ignore_index=True)
# timeStampDataAggregate.append(timeStampData)
# Create New sheet and write to Excel
timeStamp.to_excel(xlwriter, sheet_name="TimeStamp Result")

xlwriter.close()

# Close  Off
driver.close()
driver.quit()
sys.exit()
导入pymysql
从sqlalchemy导入创建引擎
pymysql.install_as_MySQLdb()
从Stampraly进口StamprallyInfo
从selenium将webdriver作为wd导入
从selenium.webdriver.common.by导入
从selenium.webdriver.support.ui导入WebDriverWait
从selenium.webdriver.support将预期的_条件导入为EC
从selenium.webdriver.support.ui导入选择
导入操作系统
导入系统
导入时间
输入数学
导入urllib.request
将numpy作为np导入
作为pd进口熊猫
进口稀土
导入MySQLdb
#开始记录程序加载时间
programStart=time.time()
县名列表=[”北海道", "青森県", "岩手県", "宮城県", "秋田県", "山形県", "福島県", "茨城県", "栃木県", "群馬県", "埼玉県", "千葉県", "東京都", "神奈川県", "新潟県", "富山県", "石川県", "福井県", "山梨県", "長野県", "岐阜県", "静岡県",
"愛知県", "三重県", "滋賀県", "京都府", "大阪府", "兵庫県", "奈良県", "和歌山県", "鳥取県", "島根県", "岡山県", "広島県", "山口県", "徳島県", "香川県", "愛媛県", "高知県", "福岡県", "佐賀県", "長崎県", "熊本県", "大分県", "宮崎県", "鹿児島県", "沖縄県"]
#县名列表=[”東京都"]
数据=[]
时间戳数据=[]
#timeStampDataAggregate=[]
contentAggregator=[]
timeStampData.append(“프로그램 시작")
timeStampData.append(programStart)
#db=db()
主要的https://stamprally.org/'
# 스탬프랠리 정보를 담는 리스트
#Stampraly_列表=[]
# 드라이브 로드
#对于MAC用户,驱动程序=wd.Chrome(可执行文件路径='。/chromedriver')
#结束记录程序加载时间
programEnd=time.time()
timeStampData.append(programEnd-programStart)
#timeStampDataAggregate.append(timeStampData)
timeStamp=pd.DataFrame(np.array([timeStampData]),列=[
'설명', '时间戳','소요기간'])
打印(时间戳)
timeStampData.clear()
打印(时间戳数据)
#开始记录Selenium驱动程序加载
打印(“셀레니엄 ")
timeStampData.append(“셀레니엄 드라이버 로딩")
seleniumStart=time.time()
timeStampData.append(seleniumStart)
driver=wd.Chrome(可执行文件\u path='chromedriver.exe')
获取驱动程序(主url)
seleniumEnd=time.time()
timeStampData.append(seleniumEnd-seleniumStart)
打印(时间戳数据)
rowAddTimeStampSelenium=pd.Series(timeStampData,index=timeStamp.columns)
timeStamp=timeStamp.append(rowaddtimestamp,ignore_index=True)
#timeStampDataAggregate.append(timeStampData)
timeStampData.clear()
# =============================================================================================
县值存储=[x.get\u属性('value'),用于驱动程序中的x.find\u元素(
“//选择[@name='search_cat1']/选项[@class='level-1']”)
# =============================================================================================
县名迭代器=-1
打印(“地区开始”+地区名称列表[地区名称迭代器])
#环游所有不同的县
对于县城值存储中的县城值:
打印(“地区开始时间”)
地区开始=时间。时间()
郡县名称迭代器+=1
#将地区名称添加到时间戳数据
timeStampData.append(县名列表[县名迭代器])
timeStampData.append(郡县开始)
打印(时间戳数据)
司机,快(
f“https://stamprally.org/?search_keywords&search_keywords_operator=and&search_cat1={perfecturevalue}&search_cat2=0”)
#计算运行页面循环的次数
imageDownloadCounter=1
totalist=driver.find\u element\u by\u css\u选择器(
'div.page_navi2.clearfix>p').text#get_属性('text'))
TotalIstNum=totalList.split(“件中")
#将TotalIstNum添加到contentAggregator
#contentAggregator.append(int(totalistnum[0]))
如果int(totalistnum[0])%10!=0:
pageLoopCount=math.ceil((int(totalistnum[0])/10))
其他:
pageLoopCount=int(TotalIstNum[0])/10
#继续
currentpage=0
当currentpageli>a”)
#将所有列表放在一个数组中
对于listURLContainer中的url:
urlist.append(url.get_属性('href'))
#遍历所有链接
对于listURLContainer中的listURL:
contentAggregator=[]
#将TotalIstNum添加到contentAggregator
contentAggregator.append(int(totalistnum[0]))
#添加级长