python: Normalize quotes with Black.

Signed-off-by: Anders Kaseorg <anders@zulip.com>
2025-11-06 06:53:25 +00:00 · 2021-02-11 23:20:45 -08:00
parent 11741543da
commit 6e4c3e41dc
989 changed files with 32792 additions and 32792 deletions
--- a/tools/documentation_crawler/documentation_crawler/settings.py
+++ b/tools/documentation_crawler/documentation_crawler/settings.py
@@ -7,20 +7,20 @@
 #     http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
 #     http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html

-BOT_NAME = 'documentation_crawler'
+BOT_NAME = "documentation_crawler"

-SPIDER_MODULES = ['documentation_crawler.spiders']
-NEWSPIDER_MODULE = 'documentation_crawler.spiders'
-COMMANDS_MODULE = 'documentation_crawler.commands'
-LOG_LEVEL = 'WARNING'
+SPIDER_MODULES = ["documentation_crawler.spiders"]
+NEWSPIDER_MODULE = "documentation_crawler.spiders"
+COMMANDS_MODULE = "documentation_crawler.commands"
+LOG_LEVEL = "WARNING"
 DOWNLOAD_TIMEOUT = 15


 # Crawl responsibly by identifying yourself (and your website) on the user-agent
 USER_AGENT = (
-    'Mozilla/5.0 (X11; Linux x86_64) '
-    'AppleWebKit/537.36 (KHTML, like Gecko) '
-    'Chrome/54.0.2840.59 Safari/537.36'
+    "Mozilla/5.0 (X11; Linux x86_64) "
+    "AppleWebKit/537.36 (KHTML, like Gecko) "
+    "Chrome/54.0.2840.59 Safari/537.36"
 )

 # Obey robots.txt rules
--- a/tools/documentation_crawler/documentation_crawler/spiders/check_documentation.py
+++ b/tools/documentation_crawler/documentation_crawler/spiders/check_documentation.py
@@ -18,6 +18,6 @@ def get_start_url() -> List[str]:

 class DocumentationSpider(BaseDocumentationSpider):
    name = "documentation_crawler"
-    deny_domains = ['localhost:9991']
-    deny = [r'\_sources\/.*\.txt']
+    deny_domains = ["localhost:9991"]
+    deny = [r"\_sources\/.*\.txt"]
    start_urls = get_start_url()
--- a/tools/documentation_crawler/documentation_crawler/spiders/check_help_documentation.py
+++ b/tools/documentation_crawler/documentation_crawler/spiders/check_help_documentation.py
@@ -22,8 +22,8 @@ class UnusedImagesLinterSpider(BaseDocumentationSpider):
        self.images_static_dir: str = get_images_dir(self.images_path)

    def _is_external_url(self, url: str) -> bool:
-        is_external = url.startswith('http') and self.start_urls[0] not in url
-        if self._has_extension(url) and f'localhost:9981/{self.images_path}' in url:
+        is_external = url.startswith("http") and self.start_urls[0] not in url
+        if self._has_extension(url) and f"localhost:9981/{self.images_path}" in url:
            self.static_images.add(basename(urlparse(url).path))
        return is_external or self._has_extension(url)

@@ -37,20 +37,20 @@ class UnusedImagesLinterSpider(BaseDocumentationSpider):
            unused_images_relatedpath = [
                os.path.join(self.images_path, img) for img in unused_images
            ]
-            raise Exception(exception_message.format(', '.join(unused_images_relatedpath)))
+            raise Exception(exception_message.format(", ".join(unused_images_relatedpath)))


 class HelpDocumentationSpider(UnusedImagesLinterSpider):
    name = "help_documentation_crawler"
-    start_urls = ['http://localhost:9981/help']
+    start_urls = ["http://localhost:9981/help"]
    deny_domains: List[str] = []
-    deny = ['/privacy']
+    deny = ["/privacy"]
    images_path = "static/images/help"


 class APIDocumentationSpider(UnusedImagesLinterSpider):
-    name = 'api_documentation_crawler'
-    start_urls = ['http://localhost:9981/api']
+    name = "api_documentation_crawler"
+    start_urls = ["http://localhost:9981/api"]
    deny_domains: List[str] = []
    images_path = "static/images/api"

@@ -58,28 +58,28 @@ class APIDocumentationSpider(UnusedImagesLinterSpider):
 class PorticoDocumentationSpider(BaseDocumentationSpider):
    def _is_external_url(self, url: str) -> bool:
        return (
-            not url.startswith('http://localhost:9981')
-            or url.startswith('http://localhost:9981/help')
-            or url.startswith('http://localhost:9981/api')
+            not url.startswith("http://localhost:9981")
+            or url.startswith("http://localhost:9981/help")
+            or url.startswith("http://localhost:9981/api")
            or self._has_extension(url)
        )

-    name = 'portico_documentation_crawler'
+    name = "portico_documentation_crawler"
    start_urls = [
-        'http://localhost:9981/hello',
-        'http://localhost:9981/history',
-        'http://localhost:9981/plans',
-        'http://localhost:9981/team',
-        'http://localhost:9981/apps',
-        'http://localhost:9981/integrations',
-        'http://localhost:9981/terms',
-        'http://localhost:9981/privacy',
-        'http://localhost:9981/features',
-        'http://localhost:9981/why-zulip',
-        'http://localhost:9981/for/open-source',
-        'http://localhost:9981/for/companies',
-        'http://localhost:9981/for/working-groups-and-communities',
-        'http://localhost:9981/for/research',
-        'http://localhost:9981/security',
+        "http://localhost:9981/hello",
+        "http://localhost:9981/history",
+        "http://localhost:9981/plans",
+        "http://localhost:9981/team",
+        "http://localhost:9981/apps",
+        "http://localhost:9981/integrations",
+        "http://localhost:9981/terms",
+        "http://localhost:9981/privacy",
+        "http://localhost:9981/features",
+        "http://localhost:9981/why-zulip",
+        "http://localhost:9981/for/open-source",
+        "http://localhost:9981/for/companies",
+        "http://localhost:9981/for/working-groups-and-communities",
+        "http://localhost:9981/for/research",
+        "http://localhost:9981/security",
    ]
    deny_domains: List[str] = []
--- a/tools/documentation_crawler/documentation_crawler/spiders/common/spiders.py
+++ b/tools/documentation_crawler/documentation_crawler/spiders/common/spiders.py
@@ -13,34 +13,34 @@ from twisted.python.failure import Failure

 EXCLUDED_URLS = [
    # Google calendar returns 404s on HEAD requests unconditionally
-    'https://calendar.google.com/calendar/embed?src=ktiduof4eoh47lmgcl2qunnc0o@group.calendar.google.com',
+    "https://calendar.google.com/calendar/embed?src=ktiduof4eoh47lmgcl2qunnc0o@group.calendar.google.com",
    # Returns 409 errors to HEAD requests frequently
-    'https://medium.freecodecamp.org/',
+    "https://medium.freecodecamp.org/",
    # Returns 404 to HEAD requests unconditionally
-    'https://www.git-tower.com/blog/command-line-cheat-sheet/',
-    'https://marketplace.visualstudio.com/items?itemName=rafaelmaiolla.remote-vscode',
+    "https://www.git-tower.com/blog/command-line-cheat-sheet/",
+    "https://marketplace.visualstudio.com/items?itemName=rafaelmaiolla.remote-vscode",
    # Requires authentication
-    'https://circleci.com/gh/zulip/zulip/tree/master',
-    'https://circleci.com/gh/zulip/zulip/16617',
-    'https://www.linkedin.com/company/zulip-project',
+    "https://circleci.com/gh/zulip/zulip/tree/master",
+    "https://circleci.com/gh/zulip/zulip/16617",
+    "https://www.linkedin.com/company/zulip-project",
    # Returns 403 errors to HEAD requests
-    'https://giphy.com',
-    'https://giphy.com/apps/giphycapture',
-    'https://www.udemy.com/course/the-complete-react-native-and-redux-course/',
+    "https://giphy.com",
+    "https://giphy.com/apps/giphycapture",
+    "https://www.udemy.com/course/the-complete-react-native-and-redux-course/",
 ]

 VNU_IGNORE = [
    # Real errors that should be fixed.
-    r'Duplicate ID “[^”]*”\.',
-    r'The first occurrence of ID “[^”]*” was here\.',
-    r'Attribute “markdown” not allowed on element “div” at this point\.',
-    r'No “p” element in scope but a “p” end tag seen\.',
-    r'Element “div” not allowed as child of element “ul” in this context\. '
-    + r'\(Suppressing further errors from this subtree\.\)',
+    r"Duplicate ID “[^”]*”\.",
+    r"The first occurrence of ID “[^”]*” was here\.",
+    r"Attribute “markdown” not allowed on element “div” at this point\.",
+    r"No “p” element in scope but a “p” end tag seen\.",
+    r"Element “div” not allowed as child of element “ul” in this context\. "
+    + r"\(Suppressing further errors from this subtree\.\)",
    # Warnings that are probably less important.
-    r'The “type” attribute is unnecessary for JavaScript resources\.',
+    r"The “type” attribute is unnecessary for JavaScript resources\.",
 ]
-VNU_IGNORE_REGEX = re.compile(r'|'.join(VNU_IGNORE))
+VNU_IGNORE_REGEX = re.compile(r"|".join(VNU_IGNORE))

 DEPLOY_ROOT = os.path.abspath(os.path.join(__file__, "../../../../../.."))

@@ -54,15 +54,15 @@ class BaseDocumentationSpider(scrapy.Spider):
    deny_domains: List[str] = []
    start_urls: List[str] = []
    deny: List[str] = []
-    file_extensions: List[str] = ['.' + ext for ext in IGNORED_EXTENSIONS]
-    tags = ('a', 'area', 'img')
-    attrs = ('href', 'src')
+    file_extensions: List[str] = ["." + ext for ext in IGNORED_EXTENSIONS]
+    tags = ("a", "area", "img")
+    attrs = ("href", "src")

    def _has_extension(self, url: str) -> bool:
        return url_has_any_extension(url, self.file_extensions)

    def _is_external_url(self, url: str) -> bool:
-        return url.startswith('http') or self._has_extension(url)
+        return url.startswith("http") or self._has_extension(url)

    def check_existing(self, response: Response) -> None:
        self.log(response)
@@ -85,7 +85,7 @@ class BaseDocumentationSpider(scrapy.Spider):
        ):
            # We can verify these links directly in the local git repo without making any requests to GitHub servers.
            return False
-        if 'github.com/zulip' in url:
+        if "github.com/zulip" in url:
            # We want to check these links but due to rate limiting from GitHub, these checks often
            # fail in the CI. Thus, we should treat these as external links for now.
            # TODO: Figure out how to test github.com/zulip links in CI.
@@ -98,7 +98,7 @@ class BaseDocumentationSpider(scrapy.Spider):
        m = re.match(r".+\#(?P<fragment>.*)$", response.request.url)  # Get fragment value.
        if not m:
            return
-        fragment = m.group('fragment')
+        fragment = m.group("fragment")
        # Check fragment existing on response page.
        if not response.selector.xpath(xpath_template.format(fragment=fragment)):
            self.logger.error(
@@ -108,17 +108,17 @@ class BaseDocumentationSpider(scrapy.Spider):
    def _vnu_callback(self, url: str) -> Callable[[Response], None]:
        def callback(response: Response) -> None:
            vnu_out = json.loads(response.text)
-            for message in vnu_out['messages']:
-                if not VNU_IGNORE_REGEX.fullmatch(message['message']):
+            for message in vnu_out["messages"]:
+                if not VNU_IGNORE_REGEX.fullmatch(message["message"]):
                    self.logger.error(
                        '"%s":%d.%d-%d.%d: %s: %s',
                        url,
-                        message.get('firstLine', message['lastLine']),
-                        message.get('firstColumn', message['lastColumn']),
-                        message['lastLine'],
-                        message['lastColumn'],
-                        message['type'],
-                        message['message'],
+                        message.get("firstLine", message["lastLine"]),
+                        message.get("firstColumn", message["lastColumn"]),
+                        message["lastLine"],
+                        message["lastColumn"],
+                        message["type"],
+                        message["message"],
                    )

        return callback
@@ -129,18 +129,18 @@ class BaseDocumentationSpider(scrapy.Spider):
        # crawl documentation served by the webapp (E.g. /help/), we
        # don't want to crawl the webapp itself, so we exclude these.
        if (
-            url in ['http://localhost:9981/', 'http://localhost:9981']
-            or url.startswith('http://localhost:9981/#')
-            or url.startswith('http://localhost:9981#')
+            url in ["http://localhost:9981/", "http://localhost:9981"]
+            or url.startswith("http://localhost:9981/#")
+            or url.startswith("http://localhost:9981#")
        ):
            return

        callback: Callable[[Response], Optional[Iterator[Request]]] = self.parse
        dont_filter = False
-        method = 'GET'
+        method = "GET"
        if self._is_external_url(url):
            callback = self.check_existing
-            method = 'HEAD'
+            method = "HEAD"

            if url.startswith(ZULIP_SERVER_GITHUB_FILE_URL_PREFIX):
                file_path = url.replace(ZULIP_SERVER_GITHUB_FILE_URL_PREFIX, DEPLOY_ROOT)
@@ -159,10 +159,10 @@ class BaseDocumentationSpider(scrapy.Spider):
                        "There is no local directory associated with the GitHub URL: %s", url
                    )
                return
-        elif '#' in url:
+        elif "#" in url:
            dont_filter = True
            callback = self.check_fragment
-        if getattr(self, 'skip_external', False) and self._is_external_link(url):
+        if getattr(self, "skip_external", False) and self._is_external_link(url):
            return
        yield Request(
            url,
@@ -179,11 +179,11 @@ class BaseDocumentationSpider(scrapy.Spider):
    def parse(self, response: Response) -> Iterator[Request]:
        self.log(response)

-        if getattr(self, 'validate_html', False):
+        if getattr(self, "validate_html", False):
            yield Request(
-                'http://127.0.0.1:9988/?out=json',
-                method='POST',
-                headers={'Content-Type': response.headers['Content-Type']},
+                "http://127.0.0.1:9988/?out=json",
+                method="POST",
+                headers={"Content-Type": response.headers["Content-Type"]},
                body=response.body,
                callback=self._vnu_callback(response.url),
                errback=self.error_callback,
@@ -191,7 +191,7 @@ class BaseDocumentationSpider(scrapy.Spider):

        for link in LxmlLinkExtractor(
            deny_domains=self.deny_domains,
-            deny_extensions=['doc'],
+            deny_extensions=["doc"],
            tags=self.tags,
            attrs=self.attrs,
            deny=self.deny,
@@ -200,7 +200,7 @@ class BaseDocumentationSpider(scrapy.Spider):
            yield from self._make_requests(link.url)

    def retry_request_with_get(self, request: Request) -> Iterator[Request]:
-        request.method = 'GET'
+        request.method = "GET"
        request.dont_filter = True
        yield request

@@ -212,7 +212,7 @@ class BaseDocumentationSpider(scrapy.Spider):
            response = failure.value.response
            if self.exclude_error(response.url):
                return None
-            if response.status == 405 and response.request.method == 'HEAD':
+            if response.status == 405 and response.request.method == "HEAD":
                # Method 'HEAD' not allowed, repeat request with 'GET'
                return self.retry_request_with_get(response.request)
            self.logger.error("Please check link: %s", response.request.url)