Python 从脚本运行scrapy Spider时,如何将用户定义的参数传递给scrapy Spider,python,scrapy,Python,Scrapy,与之类似,我尝试运行一个spider,其中一个参数(start\uurl)是用户定义的。但是,我不希望从命令行运行scrapy,而是希望从脚本中运行它 到目前为止,我掌握的代码是: import scrapy from scrapy.spiders import CrawlSpider, Rule from scrapy.linkextractors import LinkExtractor from scrapy.crawler import CrawlerProcess class Fun



import scrapy
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor
from scrapy.crawler import CrawlerProcess

class FundaMaxPagesSpider(CrawlSpider):
    name = "Funda_max_pages"
    allowed_domains = [""]
    start_urls = [""]

    le_maxpage = LinkExtractor(allow=r'%s+p\d+' % start_urls[0])   # Link to a page containing thumbnails of several houses, such as

    rules = (
    Rule(le_maxpage, callback='get_max_page_number'),

    def get_max_page_number(self, response):
        links = self.le_maxpage.extract_links(response)
        max_page_number = 0                                                 # Initialize the maximum page number
        for link in links:
            if link.url.count('/') == 6 and link.url.endswith('/'):         # Select only pages with a link depth of 3
                print("The link is %s" % link.url)
                page_number = int(link.url.split("/")[-2].strip('p'))       # For example, get the number 10 out of the string ''
                if page_number > max_page_number:
                    max_page_number = page_number                           # Update the maximum page number if the current value is larger than its previous value
        print("The maximum page number is %s" % max_page_number)
        place_name = link.url.split("/")[-3]                                # For example, "amsterdam" in ''
        print("The place name is %s" % place_name)
        filename = str(place_name)+"_max_pages.txt"                         # File name with as prefix the place name
        with open(filename,'wb') as f:
            f.write('max_page_number = %s' % max_page_number)               # Write the maximum page number to a text file
        yield {'max_page_number': max_page_number}

process = CrawlerProcess({
    'USER_AGENT': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)'

process.start() # the script will block here until the crawling is finished
start\u url

scrapy crawl FundaMaxPagesSpider -a url=''

process.crawl(FundaMaxPagesSpider, url='')

def __init__(self, url=''):
    self.start_urls = [url]