docs-test: Check only http 404 error for external links.

- Raise exception only for http 404 error rensponse, for other
  http error codes used logging to notify devs about wrong links.
This commit is contained in:
K.Kanakhin
2016-10-29 23:54:17 +06:00
committed by Tim Abbott
parent 9aa346db71
commit b248a2c33e
3 changed files with 8 additions and 6 deletions

View File

@@ -18,6 +18,8 @@ class StatusCommand(Command):
self.crawler_process.start()
# Get exceptions quantity from crawler stat data
stats = crawler.stats.get_stats()
if stats.get('spider_exceptions/Exception') or stats.get('downloader/exception_count'):
error_404 = 'downloader/response_status_count/404'
error_io = 'downloader/exception_type_count/exceptions.IOError'
if stats.get(error_404) or stats.get(error_io):
# Return non-zero exit code if exceptions are contained
self.exitcode = 1

View File

@@ -81,12 +81,11 @@ class DocumentationSpider(scrapy.Spider):
# type: (Any) -> Optional[Generator[Any, None, None]]
if hasattr(failure.value, 'response') and failure.value.response:
response = failure.value.response
if response.status in [500, 502, 503, 504, 429]:
self.log("Error! Please check link: {}".format(response), logging.ERROR)
return None
if response.status == 404:
raise Exception('Page not found: {}'.format(response))
if response.status == 405 and response.request.method == 'HEAD':
# Method 'HEAD' not allowed, repeat request with 'GET'
return self.retry_request_with_get(response.request)
raise Exception(failure.value.response)
self.log("Error! Please check link: {}".format(response), logging.ERROR)
else:
raise Exception(failure.value)