mirror of
https://github.com/zulip/zulip.git
synced 2025-11-03 21:43:21 +00:00
tools/documentation_crawler: Use python 3 syntax for typing.
This commit is contained in:
@@ -4,8 +4,7 @@ from typing import List, Any
|
||||
|
||||
|
||||
class StatusCommand(Command):
|
||||
def run(self, args, opts):
|
||||
# type: (List[str], Any) -> None
|
||||
def run(self, args: List[str], opts: Any) -> None:
|
||||
if len(args) < 1:
|
||||
raise UsageError()
|
||||
elif len(args) > 1:
|
||||
|
||||
@@ -6,8 +6,7 @@ from typing import List
|
||||
from .common.spiders import BaseDocumentationSpider
|
||||
|
||||
|
||||
def get_start_url():
|
||||
# type: () -> List[str]
|
||||
def get_start_url() -> List[str]:
|
||||
# Get index html file as start url and convert it to file uri
|
||||
dir_path = os.path.dirname(os.path.realpath(__file__))
|
||||
start_file = os.path.join(dir_path, os.path.join(*[os.pardir] * 4),
|
||||
|
||||
@@ -8,8 +8,7 @@ from .common.spiders import BaseDocumentationSpider
|
||||
from typing import Any, List, Set
|
||||
|
||||
|
||||
def get_help_images_dir(help_images_path):
|
||||
# type: (str) -> str
|
||||
def get_help_images_dir(help_images_path: str) -> str:
|
||||
# Get index html file as start url and convert it to file uri
|
||||
dir_path = os.path.dirname(os.path.realpath(__file__))
|
||||
target_path = os.path.join(dir_path, os.path.join(*[os.pardir] * 4), help_images_path)
|
||||
@@ -24,20 +23,17 @@ class HelpDocumentationSpider(BaseDocumentationSpider):
|
||||
help_images_path = "static/images/help"
|
||||
help_images_static_dir = get_help_images_dir(help_images_path)
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
# type: (*Any, **Any) -> None
|
||||
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
||||
super(HelpDocumentationSpider, self).__init__(*args, **kwargs)
|
||||
self.static_images = set() # type: Set
|
||||
|
||||
def _is_external_url(self, url):
|
||||
# type: (str) -> bool
|
||||
def _is_external_url(self, url: str) -> bool:
|
||||
is_external = url.startswith('http') and 'localhost:9981/help' not in url
|
||||
if self._has_extension(url) and 'localhost:9981/static/images/help' in url:
|
||||
self.static_images.add(basename(urlparse(url).path))
|
||||
return is_external or self._has_extension(url)
|
||||
|
||||
def closed(self, *args, **kwargs):
|
||||
# type: (*Any, **Any) -> None
|
||||
def closed(self, *args: Any, **kwargs: Any) -> None:
|
||||
unused_images = set(os.listdir(self.help_images_static_dir)) - self.static_images
|
||||
if unused_images:
|
||||
exception_message = "The following images are not used in help documentation " \
|
||||
|
||||
@@ -29,29 +29,23 @@ class BaseDocumentationSpider(scrapy.Spider):
|
||||
tags = ('a', 'area', 'img')
|
||||
attrs = ('href', 'src')
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
# type: (*Any, **Any) -> None
|
||||
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
||||
super(BaseDocumentationSpider, self).__init__(*args, **kwargs)
|
||||
self.has_error = False
|
||||
|
||||
def _set_error_state(self):
|
||||
# type: () -> None
|
||||
def _set_error_state(self) -> None:
|
||||
self.has_error = True
|
||||
|
||||
def _has_extension(self, url):
|
||||
# type: (str) -> bool
|
||||
def _has_extension(self, url: str) -> bool:
|
||||
return url_has_any_extension(url, self.file_extensions)
|
||||
|
||||
def _is_external_url(self, url):
|
||||
# type: (str) -> bool
|
||||
def _is_external_url(self, url: str) -> bool:
|
||||
return url.startswith('http') or self._has_extension(url)
|
||||
|
||||
def check_existing(self, response):
|
||||
# type: (Any) -> None
|
||||
def check_existing(self, response: Any) -> None:
|
||||
self.log(response)
|
||||
|
||||
def check_permalink(self, response):
|
||||
# type: (Any) -> None
|
||||
def check_permalink(self, response: Any) -> None:
|
||||
self.log(response)
|
||||
xpath_template = "//*[@id='{permalink}' or @name='{permalink}']"
|
||||
m = re.match(r".+\#(?P<permalink>.*)$", response.request.url) # Get anchor value.
|
||||
@@ -64,8 +58,7 @@ class BaseDocumentationSpider(scrapy.Spider):
|
||||
raise Exception(
|
||||
"Permalink #{} is not found on page {}".format(permalink, response.request.url))
|
||||
|
||||
def parse(self, response):
|
||||
# type: (Any) -> Generator[Request, None, None]
|
||||
def parse(self, response: Any) -> Generator[Request, None, None]:
|
||||
self.log(response)
|
||||
for link in LxmlLinkExtractor(deny_domains=self.deny_domains, deny_extensions=['doc'],
|
||||
tags=self.tags, attrs=self.attrs, deny=self.deny,
|
||||
@@ -82,20 +75,17 @@ class BaseDocumentationSpider(scrapy.Spider):
|
||||
yield Request(link.url, method=method, callback=callback, dont_filter=dont_filter,
|
||||
errback=self.error_callback)
|
||||
|
||||
def retry_request_with_get(self, request):
|
||||
# type: (Request) -> Generator[Request, None, None]
|
||||
def retry_request_with_get(self, request: Request) -> Generator[Request, None, None]:
|
||||
request.method = 'GET'
|
||||
request.dont_filter = True
|
||||
yield request
|
||||
|
||||
def exclude_error(self, url):
|
||||
# type: (str) -> bool
|
||||
def exclude_error(self, url: str) -> bool:
|
||||
if url in EXCLUDED_URLS:
|
||||
return True
|
||||
return False
|
||||
|
||||
def error_callback(self, failure):
|
||||
# type: (Any) -> Optional[Generator[Any, None, None]]
|
||||
def error_callback(self, failure: Any) -> Optional[Generator[Any, None, None]]:
|
||||
if hasattr(failure.value, 'response') and failure.value.response:
|
||||
response = failure.value.response
|
||||
if self.exclude_error(response.url):
|
||||
|
||||
Reference in New Issue
Block a user