tools/documentation_crawler: Use python 3 syntax for typing.

2025-11-03 21:43:21 +00:00 · 2017-10-26 11:40:25 +02:00
parent 2aefeb723e
commit e296841447
4 changed files with 16 additions and 32 deletions
--- a/tools/documentation_crawler/documentation_crawler/commands/crawl_with_status.py
+++ b/tools/documentation_crawler/documentation_crawler/commands/crawl_with_status.py
@@ -4,8 +4,7 @@ from typing import List, Any


 class StatusCommand(Command):
-    def run(self, args, opts):
-        # type: (List[str], Any) -> None
+    def run(self, args: List[str], opts: Any) -> None:
        if len(args) < 1:
            raise UsageError()
        elif len(args) > 1:
--- a/tools/documentation_crawler/documentation_crawler/spiders/check_documentation.py
+++ b/tools/documentation_crawler/documentation_crawler/spiders/check_documentation.py
@@ -6,8 +6,7 @@ from typing import List
 from .common.spiders import BaseDocumentationSpider


-def get_start_url():
-    # type: () -> List[str]
+def get_start_url() -> List[str]:
    # Get index html file as start url and convert it to file uri
    dir_path = os.path.dirname(os.path.realpath(__file__))
    start_file = os.path.join(dir_path, os.path.join(*[os.pardir] * 4),
--- a/tools/documentation_crawler/documentation_crawler/spiders/check_help_documentation.py
+++ b/tools/documentation_crawler/documentation_crawler/spiders/check_help_documentation.py
@@ -8,8 +8,7 @@ from .common.spiders import BaseDocumentationSpider
 from typing import Any, List, Set


-def get_help_images_dir(help_images_path):
-    # type: (str) -> str
+def get_help_images_dir(help_images_path: str) -> str:
    # Get index html file as start url and convert it to file uri
    dir_path = os.path.dirname(os.path.realpath(__file__))
    target_path = os.path.join(dir_path, os.path.join(*[os.pardir] * 4), help_images_path)
@@ -24,20 +23,17 @@ class HelpDocumentationSpider(BaseDocumentationSpider):
    help_images_path = "static/images/help"
    help_images_static_dir = get_help_images_dir(help_images_path)

-    def __init__(self, *args, **kwargs):
-        # type: (*Any, **Any) -> None
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
        super(HelpDocumentationSpider, self).__init__(*args, **kwargs)
        self.static_images = set()  # type: Set

-    def _is_external_url(self, url):
-        # type: (str) -> bool
+    def _is_external_url(self, url: str) -> bool:
        is_external = url.startswith('http') and 'localhost:9981/help' not in url
        if self._has_extension(url) and 'localhost:9981/static/images/help' in url:
            self.static_images.add(basename(urlparse(url).path))
        return is_external or self._has_extension(url)

-    def closed(self, *args, **kwargs):
-        # type: (*Any, **Any) -> None
+    def closed(self, *args: Any, **kwargs: Any) -> None:
        unused_images = set(os.listdir(self.help_images_static_dir)) - self.static_images
        if unused_images:
            exception_message = "The following images are not used in help documentation " \
--- a/tools/documentation_crawler/documentation_crawler/spiders/common/spiders.py
+++ b/tools/documentation_crawler/documentation_crawler/spiders/common/spiders.py
@@ -29,29 +29,23 @@ class BaseDocumentationSpider(scrapy.Spider):
    tags = ('a', 'area', 'img')
    attrs = ('href', 'src')

-    def __init__(self, *args, **kwargs):
-        # type: (*Any, **Any) -> None
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
        super(BaseDocumentationSpider, self).__init__(*args, **kwargs)
        self.has_error = False

-    def _set_error_state(self):
-        # type: () -> None
+    def _set_error_state(self) -> None:
        self.has_error = True

-    def _has_extension(self, url):
-        # type: (str) -> bool
+    def _has_extension(self, url: str) -> bool:
        return url_has_any_extension(url, self.file_extensions)

-    def _is_external_url(self, url):
-        # type: (str) -> bool
+    def _is_external_url(self, url: str) -> bool:
        return url.startswith('http') or self._has_extension(url)

-    def check_existing(self, response):
-        # type: (Any) -> None
+    def check_existing(self, response: Any) -> None:
        self.log(response)

-    def check_permalink(self, response):
-        # type: (Any) -> None
+    def check_permalink(self, response: Any) -> None:
        self.log(response)
        xpath_template = "//*[@id='{permalink}' or @name='{permalink}']"
        m = re.match(r".+\#(?P<permalink>.*)$", response.request.url)  # Get anchor value.
@@ -64,8 +58,7 @@ class BaseDocumentationSpider(scrapy.Spider):
            raise Exception(
                "Permalink #{} is not found on page {}".format(permalink, response.request.url))

-    def parse(self, response):
-        # type: (Any) -> Generator[Request, None, None]
+    def parse(self, response: Any) -> Generator[Request, None, None]:
        self.log(response)
        for link in LxmlLinkExtractor(deny_domains=self.deny_domains, deny_extensions=['doc'],
                                      tags=self.tags, attrs=self.attrs, deny=self.deny,
@@ -82,20 +75,17 @@ class BaseDocumentationSpider(scrapy.Spider):
            yield Request(link.url, method=method, callback=callback, dont_filter=dont_filter,
                          errback=self.error_callback)

-    def retry_request_with_get(self, request):
-        # type: (Request) -> Generator[Request, None, None]
+    def retry_request_with_get(self, request: Request) -> Generator[Request, None, None]:
        request.method = 'GET'
        request.dont_filter = True
        yield request

-    def exclude_error(self, url):
-        # type: (str) -> bool
+    def exclude_error(self, url: str) -> bool:
        if url in EXCLUDED_URLS:
            return True
        return False

-    def error_callback(self, failure):
-        # type: (Any) -> Optional[Generator[Any, None, None]]
+    def error_callback(self, failure: Any) -> Optional[Generator[Any, None, None]]:
        if hasattr(failure.value, 'response') and failure.value.response:
            response = failure.value.response
            if self.exclude_error(response.url):