fix: escape HTML in descriptions before link transformation
Fixes HTML injection vulnerability in generate_dynamic_docs.py where description text was not HTML-escaped before being inserted into HTML output. Special characters like <, >, & could be interpreted as HTML. Changes: - Modified transform_doc_links() to escape all non-link text when format='html' or format='web' - Link text and hrefs are also properly escaped - Non-HTML formats (markdown, python) remain unchanged - Added proper handling for descriptions with mixed plain text and links The fix splits docstrings into link and non-link segments, escapes non-link segments, and properly escapes content within link patterns. Tested with comprehensive test suite covering: - Basic HTML special characters - Special chars with links - Special chars in link text - Multiple links with special chars 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
fac6a9a457
commit
7f253da581
2 changed files with 75 additions and 25 deletions
|
|
@ -17,6 +17,8 @@ def transform_doc_links(docstring, format='html', base_url=''):
|
|||
|
||||
Detects pattern: "See also: TEXT (docs/path.md)"
|
||||
Transforms to appropriate format for output type.
|
||||
|
||||
For HTML/web formats, properly escapes content before inserting HTML tags.
|
||||
"""
|
||||
if not docstring:
|
||||
return docstring
|
||||
|
|
@ -27,14 +29,17 @@ def transform_doc_links(docstring, format='html', base_url=''):
|
|||
text, ref = match.group(1).strip(), match.group(2).strip()
|
||||
|
||||
if format == 'html':
|
||||
# Convert docs/foo.md → foo.html
|
||||
href = ref.replace('docs/', '').replace('.md', '.html')
|
||||
return f'<p class="see-also">See also: <a href="{href}">{text}</a></p>'
|
||||
# Convert docs/foo.md → foo.html and escape for safe HTML
|
||||
href = html.escape(ref.replace('docs/', '').replace('.md', '.html'), quote=True)
|
||||
text_escaped = html.escape(text)
|
||||
return f'<p class="see-also">See also: <a href="{href}">{text_escaped}</a></p>'
|
||||
|
||||
elif format == 'web':
|
||||
# Link to hosted docs
|
||||
# Link to hosted docs and escape for safe HTML
|
||||
web_path = ref.replace('docs/', '').replace('.md', '')
|
||||
return f'<p class="see-also">See also: <a href="{base_url}/{web_path}">{text}</a></p>'
|
||||
href = html.escape(f"{base_url}/{web_path}", quote=True)
|
||||
text_escaped = html.escape(text)
|
||||
return f'<p class="see-also">See also: <a href="{href}">{text_escaped}</a></p>'
|
||||
|
||||
elif format == 'markdown':
|
||||
# Markdown link
|
||||
|
|
@ -44,7 +49,29 @@ def transform_doc_links(docstring, format='html', base_url=''):
|
|||
# Keep as plain text for Python docstrings
|
||||
return match.group(0)
|
||||
|
||||
return re.sub(link_pattern, replace_link, docstring)
|
||||
# For HTML formats, escape the entire docstring first, then process links
|
||||
if format in ('html', 'web'):
|
||||
# Split by the link pattern, escape non-link parts, then reassemble
|
||||
parts = []
|
||||
last_end = 0
|
||||
|
||||
for match in re.finditer(link_pattern, docstring):
|
||||
# Escape the text before this match
|
||||
if match.start() > last_end:
|
||||
parts.append(html.escape(docstring[last_end:match.start()]))
|
||||
|
||||
# Process the link (replace_link handles escaping internally)
|
||||
parts.append(replace_link(match))
|
||||
last_end = match.end()
|
||||
|
||||
# Escape any remaining text after the last match
|
||||
if last_end < len(docstring):
|
||||
parts.append(html.escape(docstring[last_end:]))
|
||||
|
||||
return ''.join(parts)
|
||||
else:
|
||||
# For non-HTML formats, just do simple replacement
|
||||
return re.sub(link_pattern, replace_link, docstring)
|
||||
|
||||
# Must be run with McRogueFace as interpreter
|
||||
try:
|
||||
|
|
@ -339,8 +366,9 @@ def generate_html_docs():
|
|||
<div class="method-section">
|
||||
<h3><code class="function-signature">{func_name}{parsed['signature'] if parsed['signature'] else '(...)'}</code></h3>
|
||||
"""
|
||||
description = transform_doc_links(parsed['description'], format='html')
|
||||
html_content += f" <p>{description}</p>\n"
|
||||
if parsed['description']:
|
||||
description = transform_doc_links(parsed['description'], format='html')
|
||||
html_content += f" <p>{description}</p>\n"
|
||||
|
||||
if parsed['args']:
|
||||
html_content += " <h4>Arguments:</h4>\n <ul>\n"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue