From 80e311a07ed9a1c0ffd0b7c394bc0f04c8a41f22 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Mon, 22 Jul 2024 14:03:16 +0200 Subject: [PATCH] Fix Docs plaintext link scan (#14583) Signed-off-by: Glenn Jocher Co-authored-by: UltralyticsAssistant Co-authored-by: Ultralytics Assistant <135830346+UltralyticsAssistant@users.noreply.github.com> --- .github/workflows/links.yml | 2 ++ docs/build_docs.py | 6 +++++- ultralytics/utils/downloads.py | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/links.yml b/.github/workflows/links.yml index 73542b3a..216250fb 100644 --- a/.github/workflows/links.yml +++ b/.github/workflows/links.yml @@ -52,6 +52,7 @@ jobs: --exclude-path docs/hi \ --exclude-path docs/ar \ --github-token ${{ secrets.GITHUB_TOKEN }} \ + --header "User-Agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.6478.183 Safari/537.36" \ './**/*.md' \ './**/*.html' @@ -82,6 +83,7 @@ jobs: --exclude-path docs/hi \ --exclude-path docs/ar \ --github-token ${{ secrets.GITHUB_TOKEN }} \ + --header "User-Agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.6478.183 Safari/537.36" \ './**/*.md' \ './**/*.html' \ './**/*.yml' \ diff --git a/docs/build_docs.py b/docs/build_docs.py index 67ec5bd1..a6fd39b0 100644 --- a/docs/build_docs.py +++ b/docs/build_docs.py @@ -192,7 +192,11 @@ def convert_plaintext_links_to_html(content): for paragraph in main_content.find_all(["p", "li"]): # Focus on paragraphs and list items for text_node in paragraph.find_all(string=True, recursive=False): if text_node.parent.name not in {"a", "code"}: # Ignore links and code blocks - new_text = re.sub(r"(https?://\S+?)(?=[,.!?;:]?\s|[,.!?;:]?$)", r'\1', str(text_node)) + new_text = re.sub( + r'(https?://[^\s\'")]+)(?=[,.!?;:]?(?:\s|$)|[\'")])', + r'\1', + str(text_node), + ) if "