python: Reformat with Black, except quotes.

Signed-off-by: Anders Kaseorg <anders@zulip.com>
2025-11-07 15:33:30 +00:00 · 2021-02-11 23:19:30 -08:00
parent 5028c081cb
commit 11741543da
817 changed files with 44952 additions and 24860 deletions
--- a/tools/documentation_crawler/documentation_crawler/settings.py
+++ b/tools/documentation_crawler/documentation_crawler/settings.py
@@ -17,77 +17,79 @@ DOWNLOAD_TIMEOUT = 15


 # Crawl responsibly by identifying yourself (and your website) on the user-agent
-USER_AGENT = ('Mozilla/5.0 (X11; Linux x86_64) '
-              'AppleWebKit/537.36 (KHTML, like Gecko) '
-              'Chrome/54.0.2840.59 Safari/537.36')
+USER_AGENT = (
+    'Mozilla/5.0 (X11; Linux x86_64) '
+    'AppleWebKit/537.36 (KHTML, like Gecko) '
+    'Chrome/54.0.2840.59 Safari/537.36'
+)

 # Obey robots.txt rules
 ROBOTSTXT_OBEY = False

 # Configure maximum concurrent requests performed by Scrapy (default: 16)
-#CONCURRENT_REQUESTS = 32
+# CONCURRENT_REQUESTS = 32

 # Configure a delay for requests for the same website (default: 0)
 # See http://scrapy.readthedocs.org/en/latest/topics/settings.html#download-delay
 # See also autothrottle settings and docs
-#DOWNLOAD_DELAY = 3
+# DOWNLOAD_DELAY = 3
 # The download delay setting will honor only one of:
-#CONCURRENT_REQUESTS_PER_DOMAIN = 16
-#CONCURRENT_REQUESTS_PER_IP = 16
+# CONCURRENT_REQUESTS_PER_DOMAIN = 16
+# CONCURRENT_REQUESTS_PER_IP = 16

 # Disable cookies (enabled by default)
-#COOKIES_ENABLED = False
+# COOKIES_ENABLED = False

 # Disable Telnet Console (enabled by default)
-#TELNETCONSOLE_ENABLED = False
+# TELNETCONSOLE_ENABLED = False

 # Override the default request headers:
-#DEFAULT_REQUEST_HEADERS = {
+# DEFAULT_REQUEST_HEADERS = {
 #   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
 #   'Accept-Language': 'en',
-#}
+# }

 # Enable or disable spider middlewares
 # See http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
-#SPIDER_MIDDLEWARES = {
+# SPIDER_MIDDLEWARES = {
 #    'documentation_crawler.middlewares.MyCustomSpiderMiddleware': 543,
-#}
+# }

 # Enable or disable downloader middlewares
 # See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
-#DOWNLOADER_MIDDLEWARES = {
+# DOWNLOADER_MIDDLEWARES = {
 #    'documentation_crawler.middlewares.MyCustomDownloaderMiddleware': 543,
-#}
+# }

 # Enable or disable extensions
 # See http://scrapy.readthedocs.org/en/latest/topics/extensions.html
-#EXTENSIONS = {
+# EXTENSIONS = {
 #    'scrapy.extensions.telnet.TelnetConsole': None,
-#}
+# }

 # Configure item pipelines
 # See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html
-#ITEM_PIPELINES = {
+# ITEM_PIPELINES = {
 #    'documentation_crawler.pipelines.SomePipeline': 300,
-#}
+# }

 # Enable and configure the AutoThrottle extension (disabled by default)
 # See http://doc.scrapy.org/en/latest/topics/autothrottle.html
-#AUTOTHROTTLE_ENABLED = True
+# AUTOTHROTTLE_ENABLED = True
 # The initial download delay
-#AUTOTHROTTLE_START_DELAY = 5
+# AUTOTHROTTLE_START_DELAY = 5
 # The maximum download delay to be set in case of high latencies
-#AUTOTHROTTLE_MAX_DELAY = 60
+# AUTOTHROTTLE_MAX_DELAY = 60
 # The average number of requests Scrapy should be sending in parallel to
 # each remote server
-#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
+# AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
 # Enable showing throttling stats for every response received:
-#AUTOTHROTTLE_DEBUG = False
+# AUTOTHROTTLE_DEBUG = False

 # Enable and configure HTTP caching (disabled by default)
 # See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
-#HTTPCACHE_ENABLED = True
-#HTTPCACHE_EXPIRATION_SECS = 0
-#HTTPCACHE_DIR = 'httpcache'
-#HTTPCACHE_IGNORE_HTTP_CODES = []
-#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
+# HTTPCACHE_ENABLED = True
+# HTTPCACHE_EXPIRATION_SECS = 0
+# HTTPCACHE_DIR = 'httpcache'
+# HTTPCACHE_IGNORE_HTTP_CODES = []
+# HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
--- a/tools/documentation_crawler/documentation_crawler/spiders/check_documentation.py
+++ b/tools/documentation_crawler/documentation_crawler/spiders/check_documentation.py
@@ -8,8 +8,9 @@ from .common.spiders import BaseDocumentationSpider
 def get_start_url() -> List[str]:
    # Get index.html file as start URL and convert it to file URI
    dir_path = os.path.dirname(os.path.realpath(__file__))
-    start_file = os.path.join(dir_path, os.path.join(*[os.pardir] * 4),
-                              "docs/_build/html/index.html")
+    start_file = os.path.join(
+        dir_path, os.path.join(*[os.pardir] * 4), "docs/_build/html/index.html"
+    )
    return [
        pathlib.Path(os.path.abspath(start_file)).as_uri(),
    ]
--- a/tools/documentation_crawler/documentation_crawler/spiders/check_help_documentation.py
+++ b/tools/documentation_crawler/documentation_crawler/spiders/check_help_documentation.py
@@ -30,10 +30,13 @@ class UnusedImagesLinterSpider(BaseDocumentationSpider):
    def closed(self, *args: Any, **kwargs: Any) -> None:
        unused_images = set(os.listdir(self.images_static_dir)) - self.static_images
        if unused_images:
-            exception_message = "The following images are not used in documentation and can be removed: {}"
+            exception_message = (
+                "The following images are not used in documentation and can be removed: {}"
+            )
            self._set_error_state()
            unused_images_relatedpath = [
-                os.path.join(self.images_path, img) for img in unused_images]
+                os.path.join(self.images_path, img) for img in unused_images
+            ]
            raise Exception(exception_message.format(', '.join(unused_images_relatedpath)))


@@ -51,6 +54,7 @@ class APIDocumentationSpider(UnusedImagesLinterSpider):
    deny_domains: List[str] = []
    images_path = "static/images/api"

+
 class PorticoDocumentationSpider(BaseDocumentationSpider):
    def _is_external_url(self, url: str) -> bool:
        return (
@@ -61,19 +65,21 @@ class PorticoDocumentationSpider(BaseDocumentationSpider):
        )

    name = 'portico_documentation_crawler'
-    start_urls = ['http://localhost:9981/hello',
-                  'http://localhost:9981/history',
-                  'http://localhost:9981/plans',
-                  'http://localhost:9981/team',
-                  'http://localhost:9981/apps',
-                  'http://localhost:9981/integrations',
-                  'http://localhost:9981/terms',
-                  'http://localhost:9981/privacy',
-                  'http://localhost:9981/features',
-                  'http://localhost:9981/why-zulip',
-                  'http://localhost:9981/for/open-source',
-                  'http://localhost:9981/for/companies',
-                  'http://localhost:9981/for/working-groups-and-communities',
-                  'http://localhost:9981/for/research',
-                  'http://localhost:9981/security']
+    start_urls = [
+        'http://localhost:9981/hello',
+        'http://localhost:9981/history',
+        'http://localhost:9981/plans',
+        'http://localhost:9981/team',
+        'http://localhost:9981/apps',
+        'http://localhost:9981/integrations',
+        'http://localhost:9981/terms',
+        'http://localhost:9981/privacy',
+        'http://localhost:9981/features',
+        'http://localhost:9981/why-zulip',
+        'http://localhost:9981/for/open-source',
+        'http://localhost:9981/for/companies',
+        'http://localhost:9981/for/working-groups-and-communities',
+        'http://localhost:9981/for/research',
+        'http://localhost:9981/security',
+    ]
    deny_domains: List[str] = []
--- a/tools/documentation_crawler/documentation_crawler/spiders/common/spiders.py
+++ b/tools/documentation_crawler/documentation_crawler/spiders/common/spiders.py
@@ -37,7 +37,6 @@ VNU_IGNORE = [
    r'No “p” element in scope but a “p” end tag seen\.',
    r'Element “div” not allowed as child of element “ul” in this context\. '
    + r'\(Suppressing further errors from this subtree\.\)',
-
    # Warnings that are probably less important.
    r'The “type” attribute is unnecessary for JavaScript resources\.',
 ]
@@ -48,6 +47,7 @@ DEPLOY_ROOT = os.path.abspath(os.path.join(__file__, "../../../../../.."))
 ZULIP_SERVER_GITHUB_FILE_URL_PREFIX = "https://github.com/zulip/zulip/blob/master"
 ZULIP_SERVER_GITHUB_DIRECTORY_URL_PREFIX = "https://github.com/zulip/zulip/tree/master"

+
 class BaseDocumentationSpider(scrapy.Spider):
    name: Optional[str] = None
    # Exclude domain address.
@@ -80,7 +80,9 @@ class BaseDocumentationSpider(scrapy.Spider):
        if (len(url) > 4 and url[:4] == "file") or ("localhost" in url):
            # We also want CI to check any links to built documentation.
            return False
-        if url.startswith(ZULIP_SERVER_GITHUB_FILE_URL_PREFIX) or url.startswith(ZULIP_SERVER_GITHUB_DIRECTORY_URL_PREFIX):
+        if url.startswith(ZULIP_SERVER_GITHUB_FILE_URL_PREFIX) or url.startswith(
+            ZULIP_SERVER_GITHUB_DIRECTORY_URL_PREFIX
+        ):
            # We can verify these links directly in the local git repo without making any requests to GitHub servers.
            return False
        if 'github.com/zulip' in url:
@@ -100,7 +102,8 @@ class BaseDocumentationSpider(scrapy.Spider):
        # Check fragment existing on response page.
        if not response.selector.xpath(xpath_template.format(fragment=fragment)):
            self.logger.error(
-                "Fragment #%s is not found on page %s", fragment, response.request.url)
+                "Fragment #%s is not found on page %s", fragment, response.request.url
+            )

    def _vnu_callback(self, url: str) -> Callable[[Response], None]:
        def callback(response: Response) -> None:
@@ -125,7 +128,11 @@ class BaseDocumentationSpider(scrapy.Spider):
        # can be accessible without login an account.  While we do
        # crawl documentation served by the webapp (E.g. /help/), we
        # don't want to crawl the webapp itself, so we exclude these.
-        if url in ['http://localhost:9981/', 'http://localhost:9981'] or url.startswith('http://localhost:9981/#') or url.startswith('http://localhost:9981#'):
+        if (
+            url in ['http://localhost:9981/', 'http://localhost:9981']
+            or url.startswith('http://localhost:9981/#')
+            or url.startswith('http://localhost:9981#')
+        ):
            return

        callback: Callable[[Response], Optional[Iterator[Request]]] = self.parse
@@ -141,20 +148,29 @@ class BaseDocumentationSpider(scrapy.Spider):
                if hash_index != -1:
                    file_path = file_path[:hash_index]
                if not os.path.isfile(file_path):
-                    self.logger.error("There is no local file associated with the GitHub URL: %s", url)
+                    self.logger.error(
+                        "There is no local file associated with the GitHub URL: %s", url
+                    )
                return
            elif url.startswith(ZULIP_SERVER_GITHUB_DIRECTORY_URL_PREFIX):
                dir_path = url.replace(ZULIP_SERVER_GITHUB_DIRECTORY_URL_PREFIX, DEPLOY_ROOT)
                if not os.path.isdir(dir_path):
-                    self.logger.error("There is no local directory associated with the GitHub URL: %s", url)
+                    self.logger.error(
+                        "There is no local directory associated with the GitHub URL: %s", url
+                    )
                return
        elif '#' in url:
            dont_filter = True
            callback = self.check_fragment
        if getattr(self, 'skip_external', False) and self._is_external_link(url):
            return
-        yield Request(url, method=method, callback=callback, dont_filter=dont_filter,
-                      errback=self.error_callback)
+        yield Request(
+            url,
+            method=method,
+            callback=callback,
+            dont_filter=dont_filter,
+            errback=self.error_callback,
+        )

    def start_requests(self) -> Iterator[Request]:
        for url in self.start_urls:
@@ -173,9 +189,14 @@ class BaseDocumentationSpider(scrapy.Spider):
                errback=self.error_callback,
            )

-        for link in LxmlLinkExtractor(deny_domains=self.deny_domains, deny_extensions=['doc'],
-                                      tags=self.tags, attrs=self.attrs, deny=self.deny,
-                                      canonicalize=False).extract_links(response):
+        for link in LxmlLinkExtractor(
+            deny_domains=self.deny_domains,
+            deny_extensions=['doc'],
+            tags=self.tags,
+            attrs=self.attrs,
+            deny=self.deny,
+            canonicalize=False,
+        ).extract_links(response):
            yield from self._make_requests(link.url)

    def retry_request_with_get(self, request: Request) -> Iterator[Request]: