Update convert_plaintext_links_to_html (#18786)

Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com> Co-authored-by: UltralyticsAssistant <web@ultralytics.com>
2025-01-21 02:20:26 +01:00 · 2025-01-21 02:20:26 +01:00 · 617dea8e25
commit 617dea8e25
parent 28818801cb
1 changed files with 2 additions and 6 deletions
--- a/docs/build_docs.py
+++ b/docs/build_docs.py
@ -37,6 +37,7 @@ from tqdm import tqdm
 os.environ["JUPYTER_PLATFORM_DIRS"] = "1"  # fix DeprecationWarning: Jupyter is migrating to use standard platformdirs
 DOCS = Path(__file__).parent.resolve()
 SITE = DOCS.parent / "site"
+LINK_PATTERN = re.compile(r"(https?://[^\s()<>]*[^\s()<>.,:;!?\'\"])")


 def create_vercel_config():
@ -72,7 +73,6 @@ def prepare_docs_markdown(clone_repos=True):

 def update_page_title(file_path: Path, new_title: str):
    """Update the title of an HTML file."""
-    # Read the content of the file
    with open(file_path, encoding="utf-8") as file:
        content = file.read()

@ -206,11 +206,7 @@ def convert_plaintext_links_to_html(content):
    for paragraph in main_content.find_all(["p", "li"]):  # Focus on paragraphs and list items
        for text_node in paragraph.find_all(string=True, recursive=False):
            if text_node.parent.name not in {"a", "code"}:  # Ignore links and code blocks
-                new_text = re.sub(
-                    r"(https?://[^\s()<>]*[^\s()<>.,:;!?\'\"])",
-                    r'<a href="\1">\1</a>',
-                    str(text_node),
-                )
+                new_text = LINK_PATTERN.sub(r'<a href="\1">\1</a>', str(text_node))
                if "<a href=" in new_text:
                    # Parse the new text with BeautifulSoup to handle HTML properly
                    new_soup = BeautifulSoup(new_text, "html.parser")