Python 程序一旦完成就不会启动
我有一个脚本,我用来刮网页的某些信息。该脚本是下面给出的,它工作良好 我正在从Postgres数据库中获取需要搜索的URL。一旦搜索了一个URL,但它没有给出任何结果,我就将该URL移动到一个用作队列的表中。 搜索完所有URL后,程序完成,我需要它再次开始搜索排队的URL 但是我面临一个问题,在打印完时间打印--%s秒--%time.time-start\u时间后,再次启动 它已停止,但尚未重新启动 任何人都可以在这方面指导我,并让我知道,如果有任何其他,我需要改进这个代码Python 程序一旦完成就不会启动,python,python-3.x,selenium-webdriver,selenium-chromedriver,Python,Python 3.x,Selenium Webdriver,Selenium Chromedriver,我有一个脚本,我用来刮网页的某些信息。该脚本是下面给出的,它工作良好 我正在从Postgres数据库中获取需要搜索的URL。一旦搜索了一个URL,但它没有给出任何结果,我就将该URL移动到一个用作队列的表中。 搜索完所有URL后,程序完成,我需要它再次开始搜索排队的URL 但是我面临一个问题,在打印完时间打印--%s秒--%time.time-start\u时间后,再次启动 它已停止,但尚未重新启动 任何人都可以在这方面指导我,并让我知道,如果有任何其他,我需要改进这个代码 from selen
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import psycopg2
import os
import glob
import datetime
import time
from time import sleep
start_time = time.time()
final_results=[]
positions=[]
saerched_url=[]
def start_again():
print('Execution started again.....')
pass
#def db_connect():
try:
#Database connection string
DSN = "dbname='postgres' user='postgres' host='localhost' password='postgres' port='5433'"
#DWH table to which data is ported
TABLE_NAME = 'staging.search_url'
#Connecting DB..
conn = psycopg2.connect(DSN)
print("Database connected...")
#conn.set_client_encoding('utf-8')
cur = conn.cursor()
ins_cur = conn.cursor()
cur.execute("SET datestyle='German'")
except (Exception, psycopg2.Error) as error:
print('database connection failed')
quit()
def get_products(url):
product=[]
print('Passed URL : '+url)
browser.get(url)
names = browser.find_elements_by_xpath("//span[@class='pymv4e']")
product.clear()
upd_product_name_list=list(filter(None, names))
product_name = [x.text for x in upd_product_name_list]
product = [x for x in product_name if len(x.strip()) > 2]
#print(product)
if not product and "buy" not in url:
cmd=''
cmd="""INSERT into staging.no_resulting_urls(url) SELECT """+ "'"+url+ "'"
#print(cmd)
ins_cur.execute(cmd)
conn.commit()
upd_product_name_list.clear()
product_name.clear()
return product
##################################
def get_all_urls():
search_url_fetch="""WITH CTE AS (SELECT distinct id,url_to_be_searched,main_category FROM staging.search_url where
url_to_be_searched not IN( SELECT distinct CASE WHEN searched_url ILIKE '%buy%' THEN REPLACE(searched_url,'+buy','')
ELSE searched_url END FROM staging.pla_crawler_results where crawler_date::date=CURRENT_DATE AND url_to_be_searched<>''))
SELECT url_to_be_searched FROM CTE WHERE url_to_be_searched NOT IN(SELECT url FROM staging.no_resulting_urls) order by id"""
#search_url_fetch="""select url_to_be_searched from test.url where id >130 order by id"""
#search_url_fetch="""select url_to_be_searched from staging.search_url order by id"""
psql_cursor = conn.cursor()
psql_cursor.execute(search_url_fetch)
serach_url_list_r = psql_cursor.fetchall()
return serach_url_list_r
print('Fetched DB values')
##################################
#driver_close_variale=0
serach_url_list=get_all_urls()
print("Total urls need to process are : ", len(serach_url_list))
total_urls=0
total_urls=len(serach_url_list)
for row in serach_url_list:
sleep(10)
options = webdriver.ChromeOptions()
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
#options.add_argument('--headless')
options.add_argument("—-incognito")
#browser = webdriver.Chrome(executable_path='/usr/local/bin/chromedriver/', chrome_options=option)
browser = webdriver.Chrome(executable_path='/users/it/Downloads/chromedrive/chromedriver', chrome_options=options)
browser.implicitly_wait(30)
#driver_close_variale=driver_close_variale+1
passed_url=''
new_url=''
passed_url=str(row)
passed_url=passed_url.replace(',)','')
passed_url=passed_url.replace('(','')
new_url=passed_url[1:len(passed_url)-1]
print("\n")
if total_urls <=10:
cur.execute("""DELETE FROM staging.no_resulting_urls""")
print('cleared the no result table succesfully')
conn.commit()
cur.execute("""WITH CTE AS
(
SELECT DISTINCT searched_url FROM staging.pla_crawler_results
WHERE crawler_date ::DATE=CURRENT_DATE
GROUP BY searched_url
having COUNT(searched_url) <4
)
DELETE FROM staging.pla_crawler_results
WHERE crawler_date ::DATE=CURRENT_DATE
AND searched_url IN(SELECT searched_url FROM CTE)""")
conn.commit()
filtered=[]
filtered.clear()
filtered = get_products(new_url)
saerched_url.clear()
if not filtered:
new_url=new_url+'+buy'
filtered = get_products(new_url)
if not filtered:
browser.close()
browser.quit()
pass
if filtered:
#print(filtered)
positions.clear()
for x in range(1, len(filtered)+1):
positions.append(str(x))
saerched_url.append(new_url)
gobal_position=0
gobal_position=len(positions)
print('global postion first: '+str(gobal_position))
print("\n")
#print(saerched_url)
company_name_list = browser.find_elements_by_xpath("//div[@class='LbUacb']")
# use list comprehension to get the actual repo titles and not the selenium objects.
company = []
company.clear()
company = [x.text for x in company_name_list]
# print out all the titles.
#print('Company Name:')
#print(company, '\n')
-
-
-
-
-
print('Final Result: ')
result = zip(positions,filtered, urls, company,price,saerched_url,provider)
final_results.clear()
final_results.append(tuple(result))
print(final_results)
print("\n")
print('global postion end :'+str(gobal_position))
#print('Driver close variable :'+str(driver_close_variale))
total_urls=total_urls-1
i=0
try:
for d in final_results:
while i <= gobal_position:
print( d[i])
cur.execute("""INSERT into staging.pla_crawler_results(position, product_name, url,company,price,searched_url,provider) VALUES (%s, %s, %s,%s, %s,%s,%s)""", d[i])
print('Inserted succesfully')
conn.commit()
i=i+1
except (Exception, psycopg2.Error) as error:
print (error)
browser.close()
browser.quit()
pass
print("--- %s seconds ---" % (time.time() - start_time))
start_again()
从您的代码中,再次启动函数只需打印并通过即可。你用正确的缩进复制粘贴了吗?否则这是正常的,它什么也不做def start_再次:print'Execution started reach….'passIf重复整个过程是你想要的,那么你需要将你的过程封装在它自己的函数中,再次在start_中调用它,然后将所有内容放在def main中:@animalknox谢谢,这段代码中有什么需要改进的吗?