tools/documentation_crawler: Use python 3 syntax for typing.

This commit is contained in:
rht
2017-10-26 11:40:25 +02:00
committed by Tim Abbott
parent 2aefeb723e
commit e296841447
4 changed files with 16 additions and 32 deletions

View File

@@ -4,8 +4,7 @@ from typing import List, Any
class StatusCommand(Command): class StatusCommand(Command):
def run(self, args, opts): def run(self, args: List[str], opts: Any) -> None:
# type: (List[str], Any) -> None
if len(args) < 1: if len(args) < 1:
raise UsageError() raise UsageError()
elif len(args) > 1: elif len(args) > 1:

View File

@@ -6,8 +6,7 @@ from typing import List
from .common.spiders import BaseDocumentationSpider from .common.spiders import BaseDocumentationSpider
def get_start_url(): def get_start_url() -> List[str]:
# type: () -> List[str]
# Get index html file as start url and convert it to file uri # Get index html file as start url and convert it to file uri
dir_path = os.path.dirname(os.path.realpath(__file__)) dir_path = os.path.dirname(os.path.realpath(__file__))
start_file = os.path.join(dir_path, os.path.join(*[os.pardir] * 4), start_file = os.path.join(dir_path, os.path.join(*[os.pardir] * 4),

View File

@@ -8,8 +8,7 @@ from .common.spiders import BaseDocumentationSpider
from typing import Any, List, Set from typing import Any, List, Set
def get_help_images_dir(help_images_path): def get_help_images_dir(help_images_path: str) -> str:
# type: (str) -> str
# Get index html file as start url and convert it to file uri # Get index html file as start url and convert it to file uri
dir_path = os.path.dirname(os.path.realpath(__file__)) dir_path = os.path.dirname(os.path.realpath(__file__))
target_path = os.path.join(dir_path, os.path.join(*[os.pardir] * 4), help_images_path) target_path = os.path.join(dir_path, os.path.join(*[os.pardir] * 4), help_images_path)
@@ -24,20 +23,17 @@ class HelpDocumentationSpider(BaseDocumentationSpider):
help_images_path = "static/images/help" help_images_path = "static/images/help"
help_images_static_dir = get_help_images_dir(help_images_path) help_images_static_dir = get_help_images_dir(help_images_path)
def __init__(self, *args, **kwargs): def __init__(self, *args: Any, **kwargs: Any) -> None:
# type: (*Any, **Any) -> None
super(HelpDocumentationSpider, self).__init__(*args, **kwargs) super(HelpDocumentationSpider, self).__init__(*args, **kwargs)
self.static_images = set() # type: Set self.static_images = set() # type: Set
def _is_external_url(self, url): def _is_external_url(self, url: str) -> bool:
# type: (str) -> bool
is_external = url.startswith('http') and 'localhost:9981/help' not in url is_external = url.startswith('http') and 'localhost:9981/help' not in url
if self._has_extension(url) and 'localhost:9981/static/images/help' in url: if self._has_extension(url) and 'localhost:9981/static/images/help' in url:
self.static_images.add(basename(urlparse(url).path)) self.static_images.add(basename(urlparse(url).path))
return is_external or self._has_extension(url) return is_external or self._has_extension(url)
def closed(self, *args, **kwargs): def closed(self, *args: Any, **kwargs: Any) -> None:
# type: (*Any, **Any) -> None
unused_images = set(os.listdir(self.help_images_static_dir)) - self.static_images unused_images = set(os.listdir(self.help_images_static_dir)) - self.static_images
if unused_images: if unused_images:
exception_message = "The following images are not used in help documentation " \ exception_message = "The following images are not used in help documentation " \

View File

@@ -29,29 +29,23 @@ class BaseDocumentationSpider(scrapy.Spider):
tags = ('a', 'area', 'img') tags = ('a', 'area', 'img')
attrs = ('href', 'src') attrs = ('href', 'src')
def __init__(self, *args, **kwargs): def __init__(self, *args: Any, **kwargs: Any) -> None:
# type: (*Any, **Any) -> None
super(BaseDocumentationSpider, self).__init__(*args, **kwargs) super(BaseDocumentationSpider, self).__init__(*args, **kwargs)
self.has_error = False self.has_error = False
def _set_error_state(self): def _set_error_state(self) -> None:
# type: () -> None
self.has_error = True self.has_error = True
def _has_extension(self, url): def _has_extension(self, url: str) -> bool:
# type: (str) -> bool
return url_has_any_extension(url, self.file_extensions) return url_has_any_extension(url, self.file_extensions)
def _is_external_url(self, url): def _is_external_url(self, url: str) -> bool:
# type: (str) -> bool
return url.startswith('http') or self._has_extension(url) return url.startswith('http') or self._has_extension(url)
def check_existing(self, response): def check_existing(self, response: Any) -> None:
# type: (Any) -> None
self.log(response) self.log(response)
def check_permalink(self, response): def check_permalink(self, response: Any) -> None:
# type: (Any) -> None
self.log(response) self.log(response)
xpath_template = "//*[@id='{permalink}' or @name='{permalink}']" xpath_template = "//*[@id='{permalink}' or @name='{permalink}']"
m = re.match(r".+\#(?P<permalink>.*)$", response.request.url) # Get anchor value. m = re.match(r".+\#(?P<permalink>.*)$", response.request.url) # Get anchor value.
@@ -64,8 +58,7 @@ class BaseDocumentationSpider(scrapy.Spider):
raise Exception( raise Exception(
"Permalink #{} is not found on page {}".format(permalink, response.request.url)) "Permalink #{} is not found on page {}".format(permalink, response.request.url))
def parse(self, response): def parse(self, response: Any) -> Generator[Request, None, None]:
# type: (Any) -> Generator[Request, None, None]
self.log(response) self.log(response)
for link in LxmlLinkExtractor(deny_domains=self.deny_domains, deny_extensions=['doc'], for link in LxmlLinkExtractor(deny_domains=self.deny_domains, deny_extensions=['doc'],
tags=self.tags, attrs=self.attrs, deny=self.deny, tags=self.tags, attrs=self.attrs, deny=self.deny,
@@ -82,20 +75,17 @@ class BaseDocumentationSpider(scrapy.Spider):
yield Request(link.url, method=method, callback=callback, dont_filter=dont_filter, yield Request(link.url, method=method, callback=callback, dont_filter=dont_filter,
errback=self.error_callback) errback=self.error_callback)
def retry_request_with_get(self, request): def retry_request_with_get(self, request: Request) -> Generator[Request, None, None]:
# type: (Request) -> Generator[Request, None, None]
request.method = 'GET' request.method = 'GET'
request.dont_filter = True request.dont_filter = True
yield request yield request
def exclude_error(self, url): def exclude_error(self, url: str) -> bool:
# type: (str) -> bool
if url in EXCLUDED_URLS: if url in EXCLUDED_URLS:
return True return True
return False return False
def error_callback(self, failure): def error_callback(self, failure: Any) -> Optional[Generator[Any, None, None]]:
# type: (Any) -> Optional[Generator[Any, None, None]]
if hasattr(failure.value, 'response') and failure.value.response: if hasattr(failure.value, 'response') and failure.value.response:
response = failure.value.response response = failure.value.response
if self.exclude_error(response.url): if self.exclude_error(response.url):