"""
Python-Markdown extensions that process a document and add ANSI color codes to
the output.
This module provides a set of extensions for python-markdown that render
markdown elements using ANSI escape codes suitable for terminal display.
"""
import re
from typing import Dict, List, Optional
from xml.etree import ElementTree as etree
import markdown
from markdown.extensions import Extension
from markdown.postprocessors import Postprocessor
from markdown.preprocessors import Preprocessor
from markdown.treeprocessors import Treeprocessor
from command_line_assistant.rendering.colors import Style, colorize, stylize
from command_line_assistant.rendering.formatting import wrap
from command_line_assistant.rendering.theme import Theme
# Constants
MIN_TABLE_COLUMN_WIDTH = 3
TABLE_CELL_PADDING = 2
HORIZONTAL_RULE_LENGTH = 60
DEFAULT_LIST_INDEX = 1
[docs]
class ANSIRenderer:
"""Base ANSI renderer that provides common formatting utilities."""
def __init__(self, theme: Optional[Theme] = None):
"""Initialize the ANSI renderer with a theme.
Args:
theme: Theme instance to use for colors. If None, uses default theme.
"""
self.theme = theme or Theme()
[docs]
def bold(self, text: str) -> str:
"""Format text as bold."""
return stylize(text, Style.BOLD)
[docs]
def italic(self, text: str) -> str:
"""Format text as italic."""
return stylize(text, Style.ITALIC)
[docs]
def underline(self, text: str) -> str:
"""Format text as underlined."""
return stylize(text, Style.UNDERLINE)
[docs]
def strikethrough(self, text: str) -> str:
"""Format text as strikethrough."""
return stylize(text, Style.STRIKETHROUGH)
[docs]
def code_inline(self, text: str) -> str:
"""Format inline code."""
return colorize(text, self.theme.inline_code)
[docs]
def code_block(self, text: str, language: str = "") -> str:
"""Format code block."""
lines = [
colorize(line, self.theme.code_block_line)
for line in text.rstrip().split("\n")
]
longest_line_length = max(len(line) for line in text.rstrip().split("\n"))
if language:
lang_text = f" {language} snippet "
padding = longest_line_length - len(lang_text) + 6
# Color the border and language name separately
header = (
colorize("──", self.theme.code_block_border)
+ colorize(lang_text, self.theme.header)
+ colorize("─" * padding, self.theme.code_block_border)
)
footer_length = padding + len(lang_text) + 2
else:
padding = longest_line_length + 6
header = colorize("─" * (padding + 2), self.theme.code_block_border)
footer_length = padding + 2
footer = colorize("─" * footer_length, self.theme.code_block_border)
return f"\n{header}\n" + "\n".join(lines) + f"\n{footer}\n"
[docs]
def link(self, text: str, url: str, title: str = "") -> str:
"""Format link."""
link_text = colorize(text, self.theme.link)
if title:
return f"{link_text} ({colorize(url, self.theme.link)}, {title})"
return f"{link_text} ({colorize(url, self.theme.link)})"
[docs]
def image(self, alt_text: str, url: str, title: str = "") -> str:
"""Format image (as text representation)."""
return f"[Image: {alt_text}] ({colorize(url, self.theme.image)})"
[docs]
def blockquote(self, text: str) -> str:
"""Format blockquote."""
lines = text.strip().split("\n")
quoted_lines = [f"│ {line}" for line in lines]
return "\n".join(quoted_lines)
[docs]
def list_item(
self, text: str, ordered: bool = False, index: int = DEFAULT_LIST_INDEX
) -> str:
"""Format list item."""
marker = f"{index}." if ordered else "•"
return f"{marker} {text}"
[docs]
def horizontal_rule(self) -> str:
"""Format horizontal rule."""
return (
f"\n{colorize('─' * HORIZONTAL_RULE_LENGTH, self.theme.horizontal_rule)}\n"
)
[docs]
def _calculate_column_widths(self, rows: List[List[str]]) -> List[int]:
"""Calculate the width of each column in the table."""
num_cols = len(rows[0])
col_widths = []
for col in range(num_cols):
max_width = 0
for row in rows:
if col < len(row):
clean_text = self._strip_ansi(row[col])
max_width = max(max_width, len(clean_text))
col_widths.append(
max(MIN_TABLE_COLUMN_WIDTH, max_width + TABLE_CELL_PADDING)
)
return col_widths
[docs]
def _create_table_border(self, col_widths: List[int], border_type: str) -> str:
"""Create a table border line (top, separator, or bottom)."""
border_chars = {
"top": ("┌", "┬", "┐", "─"),
"separator": ("├", "┼", "┤", "─"),
"bottom": ("└", "┴", "┘", "─"),
}
left, middle, right, horizontal = border_chars[border_type]
border = left
for i, width in enumerate(col_widths):
border += horizontal * width
if i < len(col_widths) - 1:
border += middle
border += right
return border
[docs]
def _strip_ansi(self, text: str) -> str:
"""Strip ANSI escape codes from text for width calculation."""
import re
ansi_escape = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")
return ansi_escape.sub("", text)
[docs]
class FencedCodePreprocessor(Preprocessor):
"""Preprocessor that handles fenced code blocks before markdown splits text into blocks.
This processor identifies fenced code blocks and stores them with unique markers,
preventing markdown from processing the code content. The markers are later replaced
by the tree processor.
"""
# Pattern to match fenced code blocks (with optional leading whitespace)
FENCED_BLOCK_RE = re.compile(
r"^[ \t]*```(?P<lang>[\w#+.-]*)[^\n]*\n" # Opening fence with optional language
r"(?P<code>.*?)" # Code content (non-greedy)
r"^[ \t]*```\s*$", # Closing fence
re.MULTILINE | re.DOTALL,
)
def __init__(self, md: markdown.Markdown):
"""Initialize the preprocessor.
Args:
md: The Markdown instance
"""
super().__init__(md)
# Store code blocks for later retrieval
# Using dynamic attribute assignment (type: ignore for mypy)
if not hasattr(md, "_code_blocks"):
md._code_blocks = {} # type: ignore[attr-defined]
self.code_blocks: Dict[str, Dict[str, str]] = md._code_blocks # type: ignore[attr-defined]
self.counter = 0
[docs]
def run(self, lines: List[str]) -> List[str]:
"""Process fenced code blocks in the text.
Args:
lines: List of text lines to process
Returns:
List of processed lines with code blocks replaced by markers
"""
# Join lines into full text for regex matching
text = "\n".join(lines)
# Find and replace all fenced code blocks
def replace_code_block(match):
"""Replace a matched code block with a marker."""
lang = match.group("lang") or ""
code = match.group("code")
# Detect and remove base indentation from code
code = self._dedent_code(code, match.group(0))
# Generate unique marker
marker_id = f"CODEBLOCK{self.counter}"
self.counter += 1
# Store code block data
self.code_blocks[marker_id] = {"lang": lang, "code": code}
# Return a placeholder that will become a <pre><code> element
# Use a format that markdown will parse as HTML
code_html = self._create_code_html(code, lang, marker_id)
placeholder = self.md.htmlStash.store(code_html)
return placeholder
# Replace all fenced code blocks
text = self.FENCED_BLOCK_RE.sub(replace_code_block, text)
# Return as lines
return text.split("\n")
[docs]
def _dedent_code(self, code: str, full_match: str) -> str:
"""Remove base indentation from code block content.
Args:
code: The code content
full_match: The full matched text including fences
Returns:
Code with base indentation removed
"""
# Find the indentation of the opening fence
first_line = full_match.split("\n")[0]
base_indent = len(first_line) - len(first_line.lstrip())
if base_indent == 0:
return code
# Remove base indentation from each line
lines = code.split("\n")
dedented_lines = []
for line in lines:
if line.startswith(" " * base_indent):
dedented_lines.append(line[base_indent:])
elif line.startswith("\t" * base_indent):
dedented_lines.append(line[base_indent:])
elif line.strip() == "":
# Preserve empty lines
dedented_lines.append("")
else:
# Line has less indentation than base, keep as is
dedented_lines.append(line.lstrip())
return "\n".join(dedented_lines)
[docs]
def _create_code_html(self, code: str, language: str, marker_id: str) -> str:
"""Create a simple marker for the code block.
Args:
code: The code content
language: The programming language (can be empty)
marker_id: Unique marker ID for retrieval
Returns:
Marker string that will be replaced by postprocessor
"""
# Return a simple marker that the postprocessor will replace
return f"<!--{marker_id}-->"
[docs]
class CodeBlockPostprocessor(Postprocessor):
"""Postprocessor that replaces code block markers with ANSI formatted code."""
def __init__(self, md: markdown.Markdown, renderer: ANSIRenderer):
"""Initialize the postprocessor.
Args:
md: The Markdown instance
renderer: ANSI renderer for formatting code blocks
"""
super().__init__(md)
self.renderer = renderer
self.code_blocks = getattr(md, "_code_blocks", {})
[docs]
def run(self, text: str) -> str:
"""Replace code block markers with ANSI formatted code.
Args:
text: The processed markdown text
Returns:
Text with code blocks rendered as ANSI
"""
# Replace each marker with formatted code
for marker_id, block_data in self.code_blocks.items():
marker = f"<!--{marker_id}-->"
if marker in text:
code = block_data["code"]
lang = block_data["lang"]
formatted = self.renderer.code_block(code, lang)
text = text.replace(marker, formatted)
return text
[docs]
class ANSITreeProcessor(Treeprocessor):
"""Tree processor that converts HTML elements to ANSI formatted text."""
def __init__(self, md: markdown.Markdown, renderer: ANSIRenderer):
super().__init__(md)
self.renderer = renderer
self.parent_map: Dict[etree.Element, etree.Element] = {}
self.list_counters: Dict[etree.Element, int] = {}
self._setup_tag_formatters()
[docs]
def run(self, root: etree.Element) -> None:
"""Process the element tree and convert to ANSI text."""
# First pass: set up parent relationships
self._set_parent_relationships(root)
text = self._process_element(root)
# Replace the root with a single text node
root.clear()
root.text = text
root.tag = "div" # Use a neutral container
[docs]
def _set_parent_relationships(
self, elem: etree.Element, parent: Optional[etree.Element] = None
) -> None:
"""Set parent relationships for all elements."""
if parent is not None:
self.parent_map[elem] = parent
for child in elem:
self._set_parent_relationships(child, elem)
[docs]
def _process_element(self, elem: etree.Element) -> str:
"""Process a single element and its children."""
tag = elem.tag.lower()
text = elem.text or ""
tail = elem.tail or ""
# Process children first
child_text = ""
for child in elem:
child_text += self._process_element(child)
# Combine text content
content = text + child_text
# Apply formatting based on tag
formatted = self._format_by_tag(tag, elem, content)
return formatted + tail
# Text formatting methods
# Block element methods
# List methods
# Table methods
# Link and media methods
# Other methods
[docs]
class ANSIExtension(Extension):
"""Main Python-Markdown extension that provides ANSI terminal output."""
def __init__(self, theme: Optional[Theme] = None, **kwargs):
self.config = {
"renderer": [ANSIRenderer(theme), "ANSI renderer instance"],
}
super().__init__(**kwargs)
[docs]
def extendMarkdown(self, md: markdown.Markdown) -> None:
"""Register the ANSI processors."""
renderer = self.getConfig("renderer")
# Register our fenced code preprocessor
# This runs before markdown splits text into blocks
fenced_preprocessor = FencedCodePreprocessor(md)
md.preprocessors.register(fenced_preprocessor, "fenced_code_block", 25)
# Register the tree processor to convert HTML to ANSI
tree_processor = ANSITreeProcessor(md, renderer)
md.treeprocessors.register(tree_processor, "ansi", 0)
# Register the postprocessor to replace code block markers with ANSI
# This runs after the tree processor
code_postprocessor = CodeBlockPostprocessor(md, renderer)
md.postprocessors.register(code_postprocessor, "code_blocks", 15)
# Convenience functions
[docs]
def markdown_to_ansi(text: str, theme: Optional[Theme] = None, **kwargs) -> str:
"""Convert markdown text to ANSI formatted text.
Args:
text: Markdown text to convert
theme: Theme instance to use for colors. If None, uses default theme.
**kwargs: Additional arguments passed to markdown.markdown()
Returns:
ANSI formatted text suitable for terminal display
"""
md = ANSIMarkdown(theme=theme, **kwargs)
return md.convert(text)
[docs]
class ANSIMarkdown(markdown.Markdown):
"""Markdown processor that converts markdown to ANSI formatted text. This
is a convenience class that can be used in place of a markdown.Markdown
instance to render markdown to ANSI formatted text suitable for terminal
display."""
def __init__(self, theme: Optional[Theme] = None, **kwargs):
super().__init__(extensions=[ANSIExtension(theme), "tables"], **kwargs)