Update convert_plaintext_links_to_html (#18786)
Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com> Co-authored-by: UltralyticsAssistant <web@ultralytics.com>
This commit is contained in:
parent
28818801cb
commit
617dea8e25
1 changed files with 2 additions and 6 deletions
|
|
@ -37,6 +37,7 @@ from tqdm import tqdm
|
||||||
os.environ["JUPYTER_PLATFORM_DIRS"] = "1" # fix DeprecationWarning: Jupyter is migrating to use standard platformdirs
|
os.environ["JUPYTER_PLATFORM_DIRS"] = "1" # fix DeprecationWarning: Jupyter is migrating to use standard platformdirs
|
||||||
DOCS = Path(__file__).parent.resolve()
|
DOCS = Path(__file__).parent.resolve()
|
||||||
SITE = DOCS.parent / "site"
|
SITE = DOCS.parent / "site"
|
||||||
|
LINK_PATTERN = re.compile(r"(https?://[^\s()<>]*[^\s()<>.,:;!?\'\"])")
|
||||||
|
|
||||||
|
|
||||||
def create_vercel_config():
|
def create_vercel_config():
|
||||||
|
|
@ -72,7 +73,6 @@ def prepare_docs_markdown(clone_repos=True):
|
||||||
|
|
||||||
def update_page_title(file_path: Path, new_title: str):
|
def update_page_title(file_path: Path, new_title: str):
|
||||||
"""Update the title of an HTML file."""
|
"""Update the title of an HTML file."""
|
||||||
# Read the content of the file
|
|
||||||
with open(file_path, encoding="utf-8") as file:
|
with open(file_path, encoding="utf-8") as file:
|
||||||
content = file.read()
|
content = file.read()
|
||||||
|
|
||||||
|
|
@ -206,11 +206,7 @@ def convert_plaintext_links_to_html(content):
|
||||||
for paragraph in main_content.find_all(["p", "li"]): # Focus on paragraphs and list items
|
for paragraph in main_content.find_all(["p", "li"]): # Focus on paragraphs and list items
|
||||||
for text_node in paragraph.find_all(string=True, recursive=False):
|
for text_node in paragraph.find_all(string=True, recursive=False):
|
||||||
if text_node.parent.name not in {"a", "code"}: # Ignore links and code blocks
|
if text_node.parent.name not in {"a", "code"}: # Ignore links and code blocks
|
||||||
new_text = re.sub(
|
new_text = LINK_PATTERN.sub(r'<a href="\1">\1</a>', str(text_node))
|
||||||
r"(https?://[^\s()<>]*[^\s()<>.,:;!?\'\"])",
|
|
||||||
r'<a href="\1">\1</a>',
|
|
||||||
str(text_node),
|
|
||||||
)
|
|
||||||
if "<a href=" in new_text:
|
if "<a href=" in new_text:
|
||||||
# Parse the new text with BeautifulSoup to handle HTML properly
|
# Parse the new text with BeautifulSoup to handle HTML properly
|
||||||
new_soup = BeautifulSoup(new_text, "html.parser")
|
new_soup = BeautifulSoup(new_text, "html.parser")
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue