mirror of
https://github.com/zulip/zulip.git
synced 2025-11-04 14:03:30 +00:00
tools/documentation_crawler: Use python 3 syntax for typing.
This commit is contained in:
@@ -4,8 +4,7 @@ from typing import List, Any
|
|||||||
|
|
||||||
|
|
||||||
class StatusCommand(Command):
|
class StatusCommand(Command):
|
||||||
def run(self, args, opts):
|
def run(self, args: List[str], opts: Any) -> None:
|
||||||
# type: (List[str], Any) -> None
|
|
||||||
if len(args) < 1:
|
if len(args) < 1:
|
||||||
raise UsageError()
|
raise UsageError()
|
||||||
elif len(args) > 1:
|
elif len(args) > 1:
|
||||||
|
|||||||
@@ -6,8 +6,7 @@ from typing import List
|
|||||||
from .common.spiders import BaseDocumentationSpider
|
from .common.spiders import BaseDocumentationSpider
|
||||||
|
|
||||||
|
|
||||||
def get_start_url():
|
def get_start_url() -> List[str]:
|
||||||
# type: () -> List[str]
|
|
||||||
# Get index html file as start url and convert it to file uri
|
# Get index html file as start url and convert it to file uri
|
||||||
dir_path = os.path.dirname(os.path.realpath(__file__))
|
dir_path = os.path.dirname(os.path.realpath(__file__))
|
||||||
start_file = os.path.join(dir_path, os.path.join(*[os.pardir] * 4),
|
start_file = os.path.join(dir_path, os.path.join(*[os.pardir] * 4),
|
||||||
|
|||||||
@@ -8,8 +8,7 @@ from .common.spiders import BaseDocumentationSpider
|
|||||||
from typing import Any, List, Set
|
from typing import Any, List, Set
|
||||||
|
|
||||||
|
|
||||||
def get_help_images_dir(help_images_path):
|
def get_help_images_dir(help_images_path: str) -> str:
|
||||||
# type: (str) -> str
|
|
||||||
# Get index html file as start url and convert it to file uri
|
# Get index html file as start url and convert it to file uri
|
||||||
dir_path = os.path.dirname(os.path.realpath(__file__))
|
dir_path = os.path.dirname(os.path.realpath(__file__))
|
||||||
target_path = os.path.join(dir_path, os.path.join(*[os.pardir] * 4), help_images_path)
|
target_path = os.path.join(dir_path, os.path.join(*[os.pardir] * 4), help_images_path)
|
||||||
@@ -24,20 +23,17 @@ class HelpDocumentationSpider(BaseDocumentationSpider):
|
|||||||
help_images_path = "static/images/help"
|
help_images_path = "static/images/help"
|
||||||
help_images_static_dir = get_help_images_dir(help_images_path)
|
help_images_static_dir = get_help_images_dir(help_images_path)
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
||||||
# type: (*Any, **Any) -> None
|
|
||||||
super(HelpDocumentationSpider, self).__init__(*args, **kwargs)
|
super(HelpDocumentationSpider, self).__init__(*args, **kwargs)
|
||||||
self.static_images = set() # type: Set
|
self.static_images = set() # type: Set
|
||||||
|
|
||||||
def _is_external_url(self, url):
|
def _is_external_url(self, url: str) -> bool:
|
||||||
# type: (str) -> bool
|
|
||||||
is_external = url.startswith('http') and 'localhost:9981/help' not in url
|
is_external = url.startswith('http') and 'localhost:9981/help' not in url
|
||||||
if self._has_extension(url) and 'localhost:9981/static/images/help' in url:
|
if self._has_extension(url) and 'localhost:9981/static/images/help' in url:
|
||||||
self.static_images.add(basename(urlparse(url).path))
|
self.static_images.add(basename(urlparse(url).path))
|
||||||
return is_external or self._has_extension(url)
|
return is_external or self._has_extension(url)
|
||||||
|
|
||||||
def closed(self, *args, **kwargs):
|
def closed(self, *args: Any, **kwargs: Any) -> None:
|
||||||
# type: (*Any, **Any) -> None
|
|
||||||
unused_images = set(os.listdir(self.help_images_static_dir)) - self.static_images
|
unused_images = set(os.listdir(self.help_images_static_dir)) - self.static_images
|
||||||
if unused_images:
|
if unused_images:
|
||||||
exception_message = "The following images are not used in help documentation " \
|
exception_message = "The following images are not used in help documentation " \
|
||||||
|
|||||||
@@ -29,29 +29,23 @@ class BaseDocumentationSpider(scrapy.Spider):
|
|||||||
tags = ('a', 'area', 'img')
|
tags = ('a', 'area', 'img')
|
||||||
attrs = ('href', 'src')
|
attrs = ('href', 'src')
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
||||||
# type: (*Any, **Any) -> None
|
|
||||||
super(BaseDocumentationSpider, self).__init__(*args, **kwargs)
|
super(BaseDocumentationSpider, self).__init__(*args, **kwargs)
|
||||||
self.has_error = False
|
self.has_error = False
|
||||||
|
|
||||||
def _set_error_state(self):
|
def _set_error_state(self) -> None:
|
||||||
# type: () -> None
|
|
||||||
self.has_error = True
|
self.has_error = True
|
||||||
|
|
||||||
def _has_extension(self, url):
|
def _has_extension(self, url: str) -> bool:
|
||||||
# type: (str) -> bool
|
|
||||||
return url_has_any_extension(url, self.file_extensions)
|
return url_has_any_extension(url, self.file_extensions)
|
||||||
|
|
||||||
def _is_external_url(self, url):
|
def _is_external_url(self, url: str) -> bool:
|
||||||
# type: (str) -> bool
|
|
||||||
return url.startswith('http') or self._has_extension(url)
|
return url.startswith('http') or self._has_extension(url)
|
||||||
|
|
||||||
def check_existing(self, response):
|
def check_existing(self, response: Any) -> None:
|
||||||
# type: (Any) -> None
|
|
||||||
self.log(response)
|
self.log(response)
|
||||||
|
|
||||||
def check_permalink(self, response):
|
def check_permalink(self, response: Any) -> None:
|
||||||
# type: (Any) -> None
|
|
||||||
self.log(response)
|
self.log(response)
|
||||||
xpath_template = "//*[@id='{permalink}' or @name='{permalink}']"
|
xpath_template = "//*[@id='{permalink}' or @name='{permalink}']"
|
||||||
m = re.match(r".+\#(?P<permalink>.*)$", response.request.url) # Get anchor value.
|
m = re.match(r".+\#(?P<permalink>.*)$", response.request.url) # Get anchor value.
|
||||||
@@ -64,8 +58,7 @@ class BaseDocumentationSpider(scrapy.Spider):
|
|||||||
raise Exception(
|
raise Exception(
|
||||||
"Permalink #{} is not found on page {}".format(permalink, response.request.url))
|
"Permalink #{} is not found on page {}".format(permalink, response.request.url))
|
||||||
|
|
||||||
def parse(self, response):
|
def parse(self, response: Any) -> Generator[Request, None, None]:
|
||||||
# type: (Any) -> Generator[Request, None, None]
|
|
||||||
self.log(response)
|
self.log(response)
|
||||||
for link in LxmlLinkExtractor(deny_domains=self.deny_domains, deny_extensions=['doc'],
|
for link in LxmlLinkExtractor(deny_domains=self.deny_domains, deny_extensions=['doc'],
|
||||||
tags=self.tags, attrs=self.attrs, deny=self.deny,
|
tags=self.tags, attrs=self.attrs, deny=self.deny,
|
||||||
@@ -82,20 +75,17 @@ class BaseDocumentationSpider(scrapy.Spider):
|
|||||||
yield Request(link.url, method=method, callback=callback, dont_filter=dont_filter,
|
yield Request(link.url, method=method, callback=callback, dont_filter=dont_filter,
|
||||||
errback=self.error_callback)
|
errback=self.error_callback)
|
||||||
|
|
||||||
def retry_request_with_get(self, request):
|
def retry_request_with_get(self, request: Request) -> Generator[Request, None, None]:
|
||||||
# type: (Request) -> Generator[Request, None, None]
|
|
||||||
request.method = 'GET'
|
request.method = 'GET'
|
||||||
request.dont_filter = True
|
request.dont_filter = True
|
||||||
yield request
|
yield request
|
||||||
|
|
||||||
def exclude_error(self, url):
|
def exclude_error(self, url: str) -> bool:
|
||||||
# type: (str) -> bool
|
|
||||||
if url in EXCLUDED_URLS:
|
if url in EXCLUDED_URLS:
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def error_callback(self, failure):
|
def error_callback(self, failure: Any) -> Optional[Generator[Any, None, None]]:
|
||||||
# type: (Any) -> Optional[Generator[Any, None, None]]
|
|
||||||
if hasattr(failure.value, 'response') and failure.value.response:
|
if hasattr(failure.value, 'response') and failure.value.response:
|
||||||
response = failure.value.response
|
response = failure.value.response
|
||||||
if self.exclude_error(response.url):
|
if self.exclude_error(response.url):
|
||||||
|
|||||||
Reference in New Issue
Block a user