Plaintext negative lookbehind scan (#14601)

Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com>
This commit is contained in:
Glenn Jocher 2024-07-22 16:43:09 +02:00 committed by GitHub
parent 80e311a07e
commit 22a44d82c5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -193,7 +193,7 @@ def convert_plaintext_links_to_html(content):
for text_node in paragraph.find_all(string=True, recursive=False):
if text_node.parent.name not in {"a", "code"}: # Ignore links and code blocks
new_text = re.sub(
r'(https?://[^\s\'")]+)(?=[,.!?;:]?(?:\s|$)|[\'")])',
r'(https?://[^\s()<>]+(?:\.[^\s()<>]+)+)(?<![.,:;\'"])',
r'<a href="\1">\1</a>',
str(text_node),
)