Add missing HTML image alt tags (#6611)

Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com>
This commit is contained in:
Glenn Jocher 2023-11-27 17:46:29 +01:00 committed by GitHub
parent 4096b261fc
commit 42bcf8c47f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
39 changed files with 267 additions and 245 deletions

View file

@ -121,6 +121,27 @@ class MarkdownLinkFixer:
return match.group(0)
@staticmethod
def update_html_tags(content):
"""Updates HTML tags in docs."""
alt_tag = 'MISSING'
# Remove closing slashes from self-closing HTML tags
pattern = re.compile(r'<([^>]+?)\s*/>')
content = re.sub(pattern, r'<\1>', content)
# Find all images without alt tags and add placeholder alt text
pattern = re.compile(r'!\[(.*?)\]\((.*?)\)')
content, num_replacements = re.subn(pattern, lambda match: f'![{match.group(1) or alt_tag}]({match.group(2)})',
content)
# Add missing alt tags to HTML images
pattern = re.compile(r'<img\s+(?!.*?\balt\b)[^>]*src=["\'](.*?)["\'][^>]*>')
content, num_replacements = re.subn(pattern, lambda match: match.group(0).replace('>', f' alt="{alt_tag}">', 1),
content)
return content
def process_markdown_file(self, md_file_path, lang_dir):
"""Process each markdown file in the language directory."""
print(f'Processing file: {md_file_path}')
@ -134,6 +155,7 @@ class MarkdownLinkFixer:
content = self.replace_front_matter(content, lang_dir)
content = self.replace_admonitions(content, lang_dir)
content = self.update_iframe(content)
content = self.update_html_tags(content)
with open(md_file_path, 'w', encoding='utf-8') as file:
file.write(content)