diff --git a/tools/documentation_crawler/documentation_crawler/spiders/check_help_documentation.py b/tools/documentation_crawler/documentation_crawler/spiders/check_help_documentation.py index 11babb5d4c..37f8cbd7b5 100644 --- a/tools/documentation_crawler/documentation_crawler/spiders/check_help_documentation.py +++ b/tools/documentation_crawler/documentation_crawler/spiders/check_help_documentation.py @@ -8,37 +8,48 @@ from .common.spiders import BaseDocumentationSpider from typing import Any, List, Set -def get_help_images_dir(help_images_path: str) -> str: +def get_images_dir(images_path: str) -> str: # Get index html file as start url and convert it to file uri dir_path = os.path.dirname(os.path.realpath(__file__)) - target_path = os.path.join(dir_path, os.path.join(*[os.pardir] * 4), help_images_path) + target_path = os.path.join(dir_path, os.path.join(*[os.pardir] * 4), images_path) return os.path.realpath(target_path) -class HelpDocumentationSpider(BaseDocumentationSpider): - name = "help_documentation_crawler" - start_urls = ['http://localhost:9981/help'] - deny_domains = [] # type: List[str] - deny = ['/privacy'] - help_images_path = "static/images/help" - help_images_static_dir = get_help_images_dir(help_images_path) +class UnusedImagesLinterSpider(BaseDocumentationSpider): + images_path = "" def __init__(self, *args: Any, **kwargs: Any) -> None: super().__init__(*args, **kwargs) self.static_images = set() # type: Set[str] + self.images_static_dir = get_images_dir(self.images_path) # type: str def _is_external_url(self, url: str) -> bool: - is_external = url.startswith('http') and 'localhost:9981/help' not in url - if self._has_extension(url) and 'localhost:9981/static/images/help' in url: + is_external = url.startswith('http') and self.start_urls[0] not in url + if self._has_extension(url) and 'localhost:9981/{}'.format(self.images_path) in url: self.static_images.add(basename(urlparse(url).path)) return is_external or self._has_extension(url) def closed(self, *args: Any, **kwargs: Any) -> None: - unused_images = set(os.listdir(self.help_images_static_dir)) - self.static_images + unused_images = set(os.listdir(self.images_static_dir)) - self.static_images if unused_images: - exception_message = "The following images are not used in help documentation " \ + exception_message = "The following images are not used in documentation " \ "and can be removed: {}" self._set_error_state() unused_images_relatedpath = [ - os.path.join(self.help_images_path, img) for img in unused_images] + os.path.join(self.images_path, img) for img in unused_images] raise Exception(exception_message.format(', '.join(unused_images_relatedpath))) + + +class HelpDocumentationSpider(UnusedImagesLinterSpider): + name = "help_documentation_crawler" + start_urls = ['http://localhost:9981/help'] + deny_domains = [] # type: List[str] + deny = ['/privacy'] + images_path = "static/images/help" + + +class APIDocumentationSpider(UnusedImagesLinterSpider): + name = 'api_documentation_crawler' + start_urls = ['http://localhost:9981/api'] + deny_domains = [] # type: List[str] + images_path = "static/images/api" diff --git a/tools/test-help-documentation b/tools/test-help-documentation index e607ceb489..12d33326c0 100755 --- a/tools/test-help-documentation +++ b/tools/test-help-documentation @@ -26,10 +26,12 @@ LOG_FILE = 'var/help-documentation/server.log' external_host = "localhost:9981" with test_server_running(options.force, external_host, log_file=LOG_FILE, dots=True, use_db=False): - ret = subprocess.call(('scrapy', 'crawl_with_status', 'help_documentation_crawler'), - cwd='tools/documentation_crawler') + ret_help_doc = subprocess.call(('scrapy', 'crawl_with_status', 'help_documentation_crawler'), + cwd='tools/documentation_crawler') + ret_api_doc = subprocess.call(('scrapy', 'crawl_with_status', 'api_documentation_crawler'), + cwd='tools/documentation_crawler') -if ret != 0: +if ret_help_doc != 0 or ret_api_doc != 0: print("\033[0;91m") print("Failed") print("\033[0m") @@ -39,4 +41,4 @@ else: print("\033[0m") -sys.exit(ret) +sys.exit(ret_help_doc or ret_api_doc)