mirror of
				https://github.com/zulip/zulip.git
				synced 2025-11-04 05:53:43 +00:00 
			
		
		
		
	tools/documentation_crawler: Use python 3 syntax for typing.
This commit is contained in:
		@@ -4,8 +4,7 @@ from typing import List, Any
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class StatusCommand(Command):
 | 
					class StatusCommand(Command):
 | 
				
			||||||
    def run(self, args, opts):
 | 
					    def run(self, args: List[str], opts: Any) -> None:
 | 
				
			||||||
        # type: (List[str], Any) -> None
 | 
					 | 
				
			||||||
        if len(args) < 1:
 | 
					        if len(args) < 1:
 | 
				
			||||||
            raise UsageError()
 | 
					            raise UsageError()
 | 
				
			||||||
        elif len(args) > 1:
 | 
					        elif len(args) > 1:
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -6,8 +6,7 @@ from typing import List
 | 
				
			|||||||
from .common.spiders import BaseDocumentationSpider
 | 
					from .common.spiders import BaseDocumentationSpider
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_start_url():
 | 
					def get_start_url() -> List[str]:
 | 
				
			||||||
    # type: () -> List[str]
 | 
					 | 
				
			||||||
    # Get index html file as start url and convert it to file uri
 | 
					    # Get index html file as start url and convert it to file uri
 | 
				
			||||||
    dir_path = os.path.dirname(os.path.realpath(__file__))
 | 
					    dir_path = os.path.dirname(os.path.realpath(__file__))
 | 
				
			||||||
    start_file = os.path.join(dir_path, os.path.join(*[os.pardir] * 4),
 | 
					    start_file = os.path.join(dir_path, os.path.join(*[os.pardir] * 4),
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -8,8 +8,7 @@ from .common.spiders import BaseDocumentationSpider
 | 
				
			|||||||
from typing import Any, List, Set
 | 
					from typing import Any, List, Set
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_help_images_dir(help_images_path):
 | 
					def get_help_images_dir(help_images_path: str) -> str:
 | 
				
			||||||
    # type: (str) -> str
 | 
					 | 
				
			||||||
    # Get index html file as start url and convert it to file uri
 | 
					    # Get index html file as start url and convert it to file uri
 | 
				
			||||||
    dir_path = os.path.dirname(os.path.realpath(__file__))
 | 
					    dir_path = os.path.dirname(os.path.realpath(__file__))
 | 
				
			||||||
    target_path = os.path.join(dir_path, os.path.join(*[os.pardir] * 4), help_images_path)
 | 
					    target_path = os.path.join(dir_path, os.path.join(*[os.pardir] * 4), help_images_path)
 | 
				
			||||||
@@ -24,20 +23,17 @@ class HelpDocumentationSpider(BaseDocumentationSpider):
 | 
				
			|||||||
    help_images_path = "static/images/help"
 | 
					    help_images_path = "static/images/help"
 | 
				
			||||||
    help_images_static_dir = get_help_images_dir(help_images_path)
 | 
					    help_images_static_dir = get_help_images_dir(help_images_path)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def __init__(self, *args, **kwargs):
 | 
					    def __init__(self, *args: Any, **kwargs: Any) -> None:
 | 
				
			||||||
        # type: (*Any, **Any) -> None
 | 
					 | 
				
			||||||
        super(HelpDocumentationSpider, self).__init__(*args, **kwargs)
 | 
					        super(HelpDocumentationSpider, self).__init__(*args, **kwargs)
 | 
				
			||||||
        self.static_images = set()  # type: Set
 | 
					        self.static_images = set()  # type: Set
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _is_external_url(self, url):
 | 
					    def _is_external_url(self, url: str) -> bool:
 | 
				
			||||||
        # type: (str) -> bool
 | 
					 | 
				
			||||||
        is_external = url.startswith('http') and 'localhost:9981/help' not in url
 | 
					        is_external = url.startswith('http') and 'localhost:9981/help' not in url
 | 
				
			||||||
        if self._has_extension(url) and 'localhost:9981/static/images/help' in url:
 | 
					        if self._has_extension(url) and 'localhost:9981/static/images/help' in url:
 | 
				
			||||||
            self.static_images.add(basename(urlparse(url).path))
 | 
					            self.static_images.add(basename(urlparse(url).path))
 | 
				
			||||||
        return is_external or self._has_extension(url)
 | 
					        return is_external or self._has_extension(url)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def closed(self, *args, **kwargs):
 | 
					    def closed(self, *args: Any, **kwargs: Any) -> None:
 | 
				
			||||||
        # type: (*Any, **Any) -> None
 | 
					 | 
				
			||||||
        unused_images = set(os.listdir(self.help_images_static_dir)) - self.static_images
 | 
					        unused_images = set(os.listdir(self.help_images_static_dir)) - self.static_images
 | 
				
			||||||
        if unused_images:
 | 
					        if unused_images:
 | 
				
			||||||
            exception_message = "The following images are not used in help documentation " \
 | 
					            exception_message = "The following images are not used in help documentation " \
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -29,29 +29,23 @@ class BaseDocumentationSpider(scrapy.Spider):
 | 
				
			|||||||
    tags = ('a', 'area', 'img')
 | 
					    tags = ('a', 'area', 'img')
 | 
				
			||||||
    attrs = ('href', 'src')
 | 
					    attrs = ('href', 'src')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def __init__(self, *args, **kwargs):
 | 
					    def __init__(self, *args: Any, **kwargs: Any) -> None:
 | 
				
			||||||
        # type: (*Any, **Any) -> None
 | 
					 | 
				
			||||||
        super(BaseDocumentationSpider, self).__init__(*args, **kwargs)
 | 
					        super(BaseDocumentationSpider, self).__init__(*args, **kwargs)
 | 
				
			||||||
        self.has_error = False
 | 
					        self.has_error = False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _set_error_state(self):
 | 
					    def _set_error_state(self) -> None:
 | 
				
			||||||
        # type: () -> None
 | 
					 | 
				
			||||||
        self.has_error = True
 | 
					        self.has_error = True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _has_extension(self, url):
 | 
					    def _has_extension(self, url: str) -> bool:
 | 
				
			||||||
        # type: (str) -> bool
 | 
					 | 
				
			||||||
        return url_has_any_extension(url, self.file_extensions)
 | 
					        return url_has_any_extension(url, self.file_extensions)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _is_external_url(self, url):
 | 
					    def _is_external_url(self, url: str) -> bool:
 | 
				
			||||||
        # type: (str) -> bool
 | 
					 | 
				
			||||||
        return url.startswith('http') or self._has_extension(url)
 | 
					        return url.startswith('http') or self._has_extension(url)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def check_existing(self, response):
 | 
					    def check_existing(self, response: Any) -> None:
 | 
				
			||||||
        # type: (Any) -> None
 | 
					 | 
				
			||||||
        self.log(response)
 | 
					        self.log(response)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def check_permalink(self, response):
 | 
					    def check_permalink(self, response: Any) -> None:
 | 
				
			||||||
        # type: (Any) -> None
 | 
					 | 
				
			||||||
        self.log(response)
 | 
					        self.log(response)
 | 
				
			||||||
        xpath_template = "//*[@id='{permalink}' or @name='{permalink}']"
 | 
					        xpath_template = "//*[@id='{permalink}' or @name='{permalink}']"
 | 
				
			||||||
        m = re.match(r".+\#(?P<permalink>.*)$", response.request.url)  # Get anchor value.
 | 
					        m = re.match(r".+\#(?P<permalink>.*)$", response.request.url)  # Get anchor value.
 | 
				
			||||||
@@ -64,8 +58,7 @@ class BaseDocumentationSpider(scrapy.Spider):
 | 
				
			|||||||
            raise Exception(
 | 
					            raise Exception(
 | 
				
			||||||
                "Permalink #{} is not found on page {}".format(permalink, response.request.url))
 | 
					                "Permalink #{} is not found on page {}".format(permalink, response.request.url))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def parse(self, response):
 | 
					    def parse(self, response: Any) -> Generator[Request, None, None]:
 | 
				
			||||||
        # type: (Any) -> Generator[Request, None, None]
 | 
					 | 
				
			||||||
        self.log(response)
 | 
					        self.log(response)
 | 
				
			||||||
        for link in LxmlLinkExtractor(deny_domains=self.deny_domains, deny_extensions=['doc'],
 | 
					        for link in LxmlLinkExtractor(deny_domains=self.deny_domains, deny_extensions=['doc'],
 | 
				
			||||||
                                      tags=self.tags, attrs=self.attrs, deny=self.deny,
 | 
					                                      tags=self.tags, attrs=self.attrs, deny=self.deny,
 | 
				
			||||||
@@ -82,20 +75,17 @@ class BaseDocumentationSpider(scrapy.Spider):
 | 
				
			|||||||
            yield Request(link.url, method=method, callback=callback, dont_filter=dont_filter,
 | 
					            yield Request(link.url, method=method, callback=callback, dont_filter=dont_filter,
 | 
				
			||||||
                          errback=self.error_callback)
 | 
					                          errback=self.error_callback)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def retry_request_with_get(self, request):
 | 
					    def retry_request_with_get(self, request: Request) -> Generator[Request, None, None]:
 | 
				
			||||||
        # type: (Request) -> Generator[Request, None, None]
 | 
					 | 
				
			||||||
        request.method = 'GET'
 | 
					        request.method = 'GET'
 | 
				
			||||||
        request.dont_filter = True
 | 
					        request.dont_filter = True
 | 
				
			||||||
        yield request
 | 
					        yield request
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def exclude_error(self, url):
 | 
					    def exclude_error(self, url: str) -> bool:
 | 
				
			||||||
        # type: (str) -> bool
 | 
					 | 
				
			||||||
        if url in EXCLUDED_URLS:
 | 
					        if url in EXCLUDED_URLS:
 | 
				
			||||||
            return True
 | 
					            return True
 | 
				
			||||||
        return False
 | 
					        return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def error_callback(self, failure):
 | 
					    def error_callback(self, failure: Any) -> Optional[Generator[Any, None, None]]:
 | 
				
			||||||
        # type: (Any) -> Optional[Generator[Any, None, None]]
 | 
					 | 
				
			||||||
        if hasattr(failure.value, 'response') and failure.value.response:
 | 
					        if hasattr(failure.value, 'response') and failure.value.response:
 | 
				
			||||||
            response = failure.value.response
 | 
					            response = failure.value.response
 | 
				
			||||||
            if self.exclude_error(response.url):
 | 
					            if self.exclude_error(response.url):
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user