documentation_crawler: Reimplement crawl_with_status so it wraps crawl.

This way we inherit more of the upstream command’s behavior.
Importantly, this means we pass everything in `opts.spargs` to the
spider, not just `opts.spargs.skip_external`.

Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
This commit is contained in:
Anders Kaseorg
2019-06-19 00:53:02 -07:00
committed by Tim Abbott
parent a37fa74dce
commit bac63f5a3e

View File

@@ -1,22 +1,20 @@
from scrapy.commands.crawl import Command
from scrapy.exceptions import UsageError
from typing import List, Any
import optparse
from scrapy.crawler import Crawler
from scrapy.commands import crawl
from typing import List, Union
class StatusCommand(Command):
def run(self, args: List[str], opts: Any) -> None:
if len(args) < 1:
raise UsageError()
elif len(args) > 1:
raise UsageError(
"running 'scrapy crawl' with more than one spider is no longer supported")
spname = args[0]
if len(vars(opts)['spargs']) > 0:
skip_external = vars(opts)['spargs']['skip_external']
else:
skip_external = None
crawler = self.crawler_process.create_crawler(spname)
self.crawler_process.crawl(crawler, skip_external=skip_external)
self.crawler_process.start()
if crawler.stats.get_value("log_count/ERROR"):
class Command(crawl.Command):
def run(self, args: List[str], opts: optparse.Values) -> None:
crawlers = []
real_create_crawler = self.crawler_process.create_crawler
def create_crawler(crawler_or_spidercls: Union[Crawler, str]) -> Crawler:
crawler = real_create_crawler(crawler_or_spidercls)
crawlers.append(crawler)
return crawler
self.crawler_process.create_crawler = create_crawler
super().run(args, opts)
if any(crawler.stats.get_value("log_count/ERROR") for crawler in crawlers):
self.exitcode = 1