mirror of
https://github.com/zulip/zulip.git
synced 2025-11-06 06:53:25 +00:00
documentation_crawler: Reimplement crawl_with_status so it wraps crawl.
This way we inherit more of the upstream command’s behavior. Importantly, this means we pass everything in `opts.spargs` to the spider, not just `opts.spargs.skip_external`. Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
This commit is contained in:
committed by
Tim Abbott
parent
a37fa74dce
commit
bac63f5a3e
@@ -1,22 +1,20 @@
|
||||
from scrapy.commands.crawl import Command
|
||||
from scrapy.exceptions import UsageError
|
||||
from typing import List, Any
|
||||
import optparse
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.commands import crawl
|
||||
from typing import List, Union
|
||||
|
||||
|
||||
class StatusCommand(Command):
|
||||
def run(self, args: List[str], opts: Any) -> None:
|
||||
if len(args) < 1:
|
||||
raise UsageError()
|
||||
elif len(args) > 1:
|
||||
raise UsageError(
|
||||
"running 'scrapy crawl' with more than one spider is no longer supported")
|
||||
spname = args[0]
|
||||
if len(vars(opts)['spargs']) > 0:
|
||||
skip_external = vars(opts)['spargs']['skip_external']
|
||||
else:
|
||||
skip_external = None
|
||||
crawler = self.crawler_process.create_crawler(spname)
|
||||
self.crawler_process.crawl(crawler, skip_external=skip_external)
|
||||
self.crawler_process.start()
|
||||
if crawler.stats.get_value("log_count/ERROR"):
|
||||
class Command(crawl.Command):
|
||||
def run(self, args: List[str], opts: optparse.Values) -> None:
|
||||
crawlers = []
|
||||
real_create_crawler = self.crawler_process.create_crawler
|
||||
|
||||
def create_crawler(crawler_or_spidercls: Union[Crawler, str]) -> Crawler:
|
||||
crawler = real_create_crawler(crawler_or_spidercls)
|
||||
crawlers.append(crawler)
|
||||
return crawler
|
||||
|
||||
self.crawler_process.create_crawler = create_crawler
|
||||
super().run(args, opts)
|
||||
if any(crawler.stats.get_value("log_count/ERROR") for crawler in crawlers):
|
||||
self.exitcode = 1
|
||||
|
||||
Reference in New Issue
Block a user