Python 程序一旦完成就不会启动

Python 程序一旦完成就不会启动,python,python-3.x,selenium-webdriver,selenium-chromedriver,Python,Python 3.x,Selenium Webdriver,Selenium Chromedriver,我有一个脚本,我用来刮网页的某些信息。该脚本是下面给出的,它工作良好 我正在从Postgres数据库中获取需要搜索的URL。一旦搜索了一个URL,但它没有给出任何结果,我就将该URL移动到一个用作队列的表中。 搜索完所有URL后,程序完成,我需要它再次开始搜索排队的URL 但是我面临一个问题,在打印完时间打印--%s秒--%time.time-start\u时间后,再次启动 它已停止,但尚未重新启动 任何人都可以在这方面指导我,并让我知道,如果有任何其他,我需要改进这个代码 from selen

我有一个脚本,我用来刮网页的某些信息。该脚本是下面给出的,它工作良好

我正在从Postgres数据库中获取需要搜索的URL。一旦搜索了一个URL,但它没有给出任何结果,我就将该URL移动到一个用作队列的表中。 搜索完所有URL后,程序完成,我需要它再次开始搜索排队的URL

但是我面临一个问题,在打印完时间打印--%s秒--%time.time-start\u时间后,再次启动

它已停止,但尚未重新启动

任何人都可以在这方面指导我,并让我知道,如果有任何其他,我需要改进这个代码

from selenium import webdriver 
from selenium.webdriver.common.by import By 
from selenium.webdriver.support.ui import WebDriverWait 
from selenium.webdriver.support import expected_conditions as EC 
from selenium.common.exceptions import TimeoutException
import psycopg2
import os
import glob
import datetime
import time
from time import sleep

start_time = time.time()


final_results=[]
positions=[]
saerched_url=[]

def start_again():
     print('Execution started again.....')
     pass

#def db_connect():
try:
    #Database connection string
     DSN = "dbname='postgres' user='postgres' host='localhost' password='postgres' port='5433'"
     #DWH table to which data is ported
     TABLE_NAME = 'staging.search_url'
     #Connecting DB..
     conn = psycopg2.connect(DSN)
     print("Database connected...")
     #conn.set_client_encoding('utf-8')
     cur = conn.cursor()
     ins_cur = conn.cursor()
     cur.execute("SET datestyle='German'")
except (Exception, psycopg2.Error) as error:
     print('database connection failed')
     quit()

def get_products(url):
    product=[]
    print('Passed URL : '+url)
    browser.get(url)
    names = browser.find_elements_by_xpath("//span[@class='pymv4e']")
    product.clear()
    upd_product_name_list=list(filter(None, names))
    product_name = [x.text for x in upd_product_name_list]
    product = [x for x in product_name if len(x.strip()) > 2]
    #print(product)
    if not product and "buy" not in url:
         cmd=''
         cmd="""INSERT into staging.no_resulting_urls(url) SELECT """+ "'"+url+ "'"
         #print(cmd)
         ins_cur.execute(cmd)
         conn.commit()
    upd_product_name_list.clear()
    product_name.clear()
    return product
        


##################################
def get_all_urls():
     
     search_url_fetch="""WITH CTE AS (SELECT distinct  id,url_to_be_searched,main_category FROM staging.search_url where  
                           url_to_be_searched not  IN( SELECT distinct CASE WHEN searched_url ILIKE '%buy%' THEN REPLACE(searched_url,'+buy','')
                           ELSE  searched_url  END  FROM staging.pla_crawler_results where crawler_date::date=CURRENT_DATE AND url_to_be_searched<>''))
                                       SELECT url_to_be_searched FROM CTE WHERE url_to_be_searched NOT IN(SELECT url FROM staging.no_resulting_urls) order by id"""
     #search_url_fetch="""select url_to_be_searched from test.url where id >130 order by id"""
     #search_url_fetch="""select url_to_be_searched from staging.search_url order by id"""

     psql_cursor = conn.cursor()
     psql_cursor.execute(search_url_fetch)
     serach_url_list_r = psql_cursor.fetchall()
     return serach_url_list_r
print('Fetched DB values')
##################################
#driver_close_variale=0
serach_url_list=get_all_urls()
print("Total urls need to process are :  ", len(serach_url_list))
total_urls=0
total_urls=len(serach_url_list)

for row in serach_url_list:
    sleep(10)
    options = webdriver.ChromeOptions()
    options.add_experimental_option("excludeSwitches", ["enable-automation"])
    options.add_experimental_option('useAutomationExtension', False)
    #options.add_argument('--headless')
    options.add_argument("—-incognito")
    #browser = webdriver.Chrome(executable_path='/usr/local/bin/chromedriver/', chrome_options=option)
    browser = webdriver.Chrome(executable_path='/users/it/Downloads/chromedrive/chromedriver', chrome_options=options)
    browser.implicitly_wait(30)
    
    #driver_close_variale=driver_close_variale+1
    passed_url=''
    new_url=''
    passed_url=str(row)
    passed_url=passed_url.replace(',)','')
    passed_url=passed_url.replace('(','')
    new_url=passed_url[1:len(passed_url)-1]
    print("\n")

    if total_urls <=10:
       cur.execute("""DELETE FROM staging.no_resulting_urls""")
       print('cleared the no result table succesfully')
       conn.commit()
       
       
       cur.execute("""WITH CTE AS
                        (
                        SELECT DISTINCT searched_url FROM staging.pla_crawler_results 
                        WHERE crawler_date ::DATE=CURRENT_DATE
                        GROUP BY searched_url
                        having COUNT(searched_url) <4
                         )
                        DELETE FROM staging.pla_crawler_results 
                        WHERE crawler_date ::DATE=CURRENT_DATE
                        AND searched_url IN(SELECT searched_url FROM CTE)""") 
       conn.commit()  
               
    filtered=[]
    filtered.clear()
    filtered = get_products(new_url)
    saerched_url.clear()
    if not filtered:
        new_url=new_url+'+buy'
        filtered = get_products(new_url)
        if not filtered:
             browser.close()
             browser.quit()
             pass

    if filtered:
         #print(filtered)
         positions.clear()
         for x in range(1, len(filtered)+1):
           positions.append(str(x))
           saerched_url.append(new_url)
           
         gobal_position=0
         gobal_position=len(positions)
         print('global postion first: '+str(gobal_position))
         print("\n")

         #print(saerched_url)
         company_name_list = browser.find_elements_by_xpath("//div[@class='LbUacb']")
         # use list comprehension to get the actual repo titles and not the selenium objects.
         company = []
         company.clear()
         company = [x.text for x in company_name_list]
         # print out all the titles.
         #print('Company Name:')
         #print(company, '\n')
         -
         -
         -
         -
         -
         print('Final Result: ')
         result = zip(positions,filtered, urls, company,price,saerched_url,provider)
         final_results.clear()
         final_results.append(tuple(result))
         print(final_results)
         print("\n")


         print('global postion end :'+str(gobal_position))
         #print('Driver close variable :'+str(driver_close_variale))
         total_urls=total_urls-1
         i=0
         try:
          for d in final_results:
                
                while i <= gobal_position:
                  print( d[i])
                  cur.execute("""INSERT into staging.pla_crawler_results(position, product_name, url,company,price,searched_url,provider) VALUES (%s, %s, %s,%s, %s,%s,%s)""", d[i])
                  print('Inserted succesfully')
                  conn.commit()
                  i=i+1

               
         except (Exception, psycopg2.Error) as error:
             print (error)
             browser.close()
             browser.quit()
             pass

print("--- %s seconds ---" % (time.time() - start_time))

start_again()

从您的代码中,再次启动函数只需打印并通过即可。你用正确的缩进复制粘贴了吗?否则这是正常的,它什么也不做def start_再次:print'Execution started reach….'passIf重复整个过程是你想要的,那么你需要将你的过程封装在它自己的函数中,再次在start_中调用它,然后将所有内容放在def main中:@animalknox谢谢,这段代码中有什么需要改进的吗?