Scrapy-cluster回调请求不起作用,卡在处理元直通中间件中

发布时间:2020-07-06 06:09

This this debug panel of kibana 我正在尝试使用scrapy-cluster,但这在回调请求中不起作用。这项工作在scrapy上可以正常使用,但在scrapy-cluster中不能使用。无法刮掉卡在处理元直通中间件中的数据

class EbayDataSpider(RedisSpider):
    name = 'ebay_data'
    
    # Allow a custom parameter (-a flag in the scrapy command)
    def __init__(self, search="iphone 64GB", *args, **kwargs):
        self.search_string = search
        super(EbayDataSpider, self).__init__(*args, **kwargs)

    def parse(self, response):
        # Extrach the trksid to build a search request
        trksid = response.css("input[type='hidden'][name='_trksid']").xpath(
            "@value").extract()[0]

        # Build the url and start the requests
        yield response.follow(url="http://www.ebay.com/sch/i.html?_from=R40&_trksid=" + trksid +
                             "&_nkw=" +
                             self.search_string.replace(
                                 ' ', '+') + "&_sacat=0",
                             callback=self.parse_link)

    # Parse the search results
    def parse_link(self, response):
        # Extract the list of products
        results = response.xpath(
            '//div/div/ul/li[contains(@class, "s-item" )]')

        # Extract info for each product
        for product in results:
            product_url = product.xpath(
                './/a[@class="s-item__link"]/@href').extract_first()
        yield response.follow(url=product_url, callback=self.parse_product_details)

    def parse_product_deails(self, response):
        # capture raw response
        item = RawResponseItem()
    
        # populated from response.meta
        item['appid'] = response.meta['appid']
        item['crawlid'] = response.meta['crawlid']
        item['attrs'] = response.meta['attrs']
        # populated from raw HTTP response
        item["url"] = response.request.url
        item["response_url"] = response.url
        item["status_code"] = response.status
        item["status_msg"] = "OK"
        item["response_headers"] = self.reconstruct_headers(response)
        item["request_headers"] = response.request.headers
        #item["body"] = response.body
        item["body"] = "This is empty body from amazon spider"
        item["links"] = []

        # Add more data from details page
        item['p_brand'] = response.xpath(
            "//div[@id='viTabs_0_is']//tbody//tr[1]//td[4]//span/text()").extract()
        item['p_title'] = response.xpath("//h1[@id='itemTitle']/text()").extract()
        item['p_price'] = response.xpath("//span[@id='prcIsum']/text()").extract()
        
        yield item
回答1