Fix Docs plaintext link scan (#14583)

Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com>
Co-authored-by: UltralyticsAssistant <web@ultralytics.com>
Co-authored-by: Ultralytics Assistant <135830346+UltralyticsAssistant@users.noreply.github.com>
This commit is contained in:
Glenn Jocher 2024-07-22 14:03:16 +02:00 committed by GitHub
parent 3b81b95e1c
commit 80e311a07e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 8 additions and 2 deletions

View file

@ -52,6 +52,7 @@ jobs:
--exclude-path docs/hi \ --exclude-path docs/hi \
--exclude-path docs/ar \ --exclude-path docs/ar \
--github-token ${{ secrets.GITHUB_TOKEN }} \ --github-token ${{ secrets.GITHUB_TOKEN }} \
--header "User-Agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.6478.183 Safari/537.36" \
'./**/*.md' \ './**/*.md' \
'./**/*.html' './**/*.html'
@ -82,6 +83,7 @@ jobs:
--exclude-path docs/hi \ --exclude-path docs/hi \
--exclude-path docs/ar \ --exclude-path docs/ar \
--github-token ${{ secrets.GITHUB_TOKEN }} \ --github-token ${{ secrets.GITHUB_TOKEN }} \
--header "User-Agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.6478.183 Safari/537.36" \
'./**/*.md' \ './**/*.md' \
'./**/*.html' \ './**/*.html' \
'./**/*.yml' \ './**/*.yml' \

View file

@ -192,7 +192,11 @@ def convert_plaintext_links_to_html(content):
for paragraph in main_content.find_all(["p", "li"]): # Focus on paragraphs and list items for paragraph in main_content.find_all(["p", "li"]): # Focus on paragraphs and list items
for text_node in paragraph.find_all(string=True, recursive=False): for text_node in paragraph.find_all(string=True, recursive=False):
if text_node.parent.name not in {"a", "code"}: # Ignore links and code blocks if text_node.parent.name not in {"a", "code"}: # Ignore links and code blocks
new_text = re.sub(r"(https?://\S+?)(?=[,.!?;:]?\s|[,.!?;:]?$)", r'<a href="\1">\1</a>', str(text_node)) new_text = re.sub(
r'(https?://[^\s\'")]+)(?=[,.!?;:]?(?:\s|$)|[\'")])',
r'<a href="\1">\1</a>',
str(text_node),
)
if "<a" in new_text: if "<a" in new_text:
new_soup = BeautifulSoup(new_text, "html.parser") new_soup = BeautifulSoup(new_text, "html.parser")
text_node.replace_with(new_soup) text_node.replace_with(new_soup)

View file

@ -199,7 +199,7 @@ def check_disk_space(url="https://ultralytics.com/assets/coco8.zip", path=Path.c
Check if there is sufficient disk space to download and store a file. Check if there is sufficient disk space to download and store a file.
Args: Args:
url (str, optional): The URL to the file. Defaults to 'https://github.com/ultralytics/assets/releases/download/v0.0.0/coco8.zip'. url (str, optional): The URL to the file. Defaults to 'https://ultralytics.com/assets/coco8.zip'.
path (str | Path, optional): The path or drive to check the available free space on. path (str | Path, optional): The path or drive to check the available free space on.
sf (float, optional): Safety factor, the multiplier for the required free space. Defaults to 2.0. sf (float, optional): Safety factor, the multiplier for the required free space. Defaults to 2.0.
hard (bool, optional): Whether to throw an error or not on insufficient disk space. Defaults to True. hard (bool, optional): Whether to throw an error or not on insufficient disk space. Defaults to True.