Files
zulip/tools/documentation_crawler/documentation_crawler/spiders/check_documentation.py
2024-07-13 22:28:22 -07:00

23 lines
630 B
Python

import os
import pathlib
from .common.spiders import BaseDocumentationSpider
def get_start_url() -> list[str]:
# Get index.html file as start URL and convert it to file URI
dir_path = os.path.dirname(os.path.realpath(__file__))
start_file = os.path.join(
dir_path, os.path.join(*[os.pardir] * 4), "docs/_build/html/index.html"
)
return [
pathlib.Path(os.path.abspath(start_file)).as_uri(),
]
class DocumentationSpider(BaseDocumentationSpider):
name = "documentation_crawler"
deny_domains = ["localhost:9991"]
deny = [r"\_sources\/.*\.txt"]
start_urls = get_start_url()