Source code for command_line_assistant.rendering.markdown

"""
Python-Markdown extensions that process a document and add ANSI color codes to
the output.

This module provides a set of extensions for python-markdown that render
markdown elements using ANSI escape codes suitable for terminal display.
"""

import re
from typing import Dict, List, Optional
from xml.etree import ElementTree as etree

import markdown
from markdown.extensions import Extension
from markdown.postprocessors import Postprocessor
from markdown.preprocessors import Preprocessor
from markdown.treeprocessors import Treeprocessor

from command_line_assistant.rendering.colors import Style, colorize, stylize
from command_line_assistant.rendering.formatting import wrap
from command_line_assistant.rendering.theme import Theme

# Constants
MIN_TABLE_COLUMN_WIDTH = 3
TABLE_CELL_PADDING = 2
HORIZONTAL_RULE_LENGTH = 60
DEFAULT_LIST_INDEX = 1


[docs] class ANSIRenderer: """Base ANSI renderer that provides common formatting utilities.""" def __init__(self, theme: Optional[Theme] = None): """Initialize the ANSI renderer with a theme. Args: theme: Theme instance to use for colors. If None, uses default theme. """ self.theme = theme or Theme()
[docs] def bold(self, text: str) -> str: """Format text as bold.""" return stylize(text, Style.BOLD)
[docs] def italic(self, text: str) -> str: """Format text as italic.""" return stylize(text, Style.ITALIC)
[docs] def underline(self, text: str) -> str: """Format text as underlined.""" return stylize(text, Style.UNDERLINE)
[docs] def strikethrough(self, text: str) -> str: """Format text as strikethrough.""" return stylize(text, Style.STRIKETHROUGH)
[docs] def code_inline(self, text: str) -> str: """Format inline code.""" return colorize(text, self.theme.inline_code)
[docs] def code_block(self, text: str, language: str = "") -> str: """Format code block.""" lines = [ colorize(line, self.theme.code_block_line) for line in text.rstrip().split("\n") ] longest_line_length = max(len(line) for line in text.rstrip().split("\n")) if language: lang_text = f" {language} snippet " padding = longest_line_length - len(lang_text) + 6 # Color the border and language name separately header = ( colorize("──", self.theme.code_block_border) + colorize(lang_text, self.theme.header) + colorize("─" * padding, self.theme.code_block_border) ) footer_length = padding + len(lang_text) + 2 else: padding = longest_line_length + 6 header = colorize("─" * (padding + 2), self.theme.code_block_border) footer_length = padding + 2 footer = colorize("─" * footer_length, self.theme.code_block_border) return f"\n{header}\n" + "\n".join(lines) + f"\n{footer}\n"
[docs] def header(self, text: str, level: int) -> str: """Format header.""" prefix = "#" * level return f"\n{colorize(f'{prefix} {text}', self.theme.header)}\n"
[docs] def image(self, alt_text: str, url: str, title: str = "") -> str: """Format image (as text representation).""" return f"[Image: {alt_text}] ({colorize(url, self.theme.image)})"
[docs] def blockquote(self, text: str) -> str: """Format blockquote.""" lines = text.strip().split("\n") quoted_lines = [f"│ {line}" for line in lines] return "\n".join(quoted_lines)
[docs] def list_item( self, text: str, ordered: bool = False, index: int = DEFAULT_LIST_INDEX ) -> str: """Format list item.""" marker = f"{index}." if ordered else "•" return f"{marker} {text}"
[docs] def horizontal_rule(self) -> str: """Format horizontal rule.""" return ( f"\n{colorize('─' * HORIZONTAL_RULE_LENGTH, self.theme.horizontal_rule)}\n" )
[docs] def format_table(self, rows: List[List[str]], header_row: bool = True) -> str: """Format a complete table with proper column alignment.""" if not rows: return "" col_widths = self._calculate_column_widths(rows) result = [] # Top border result.append(self._create_table_border(col_widths, "top")) # Process each row for row_idx, row in enumerate(rows): formatted_cells = self._format_table_cells( row, col_widths, row_idx, header_row ) row_content = "│" + "│".join(formatted_cells) + "│" result.append(row_content) # Add separator after header if row_idx == 0 and header_row and len(rows) > 1: result.append(self._create_table_border(col_widths, "separator")) # Bottom border result.append(self._create_table_border(col_widths, "bottom")) return "\n" + "\n".join(result) + "\n"
[docs] def _calculate_column_widths(self, rows: List[List[str]]) -> List[int]: """Calculate the width of each column in the table.""" num_cols = len(rows[0]) col_widths = [] for col in range(num_cols): max_width = 0 for row in rows: if col < len(row): clean_text = self._strip_ansi(row[col]) max_width = max(max_width, len(clean_text)) col_widths.append( max(MIN_TABLE_COLUMN_WIDTH, max_width + TABLE_CELL_PADDING) ) return col_widths
[docs] def _create_table_border(self, col_widths: List[int], border_type: str) -> str: """Create a table border line (top, separator, or bottom).""" border_chars = { "top": ("┌", "┬", "┐", "─"), "separator": ("├", "┼", "┤", "─"), "bottom": ("└", "┴", "┘", "─"), } left, middle, right, horizontal = border_chars[border_type] border = left for i, width in enumerate(col_widths): border += horizontal * width if i < len(col_widths) - 1: border += middle border += right return border
[docs] def _format_table_cells( self, row: List[str], col_widths: List[int], row_idx: int, header_row: bool ) -> List[str]: """Format all cells in a table row.""" formatted_cells = [] for col, cell in enumerate(row): if col < len(col_widths): clean_text = self._strip_ansi(cell) padding = col_widths[col] - len(clean_text) left_pad = padding // 2 right_pad = padding - left_pad if row_idx == 0 and header_row: # Header cell formatted_cell = f"{' ' * left_pad}{colorize(cell, self.theme.header)}{' ' * right_pad}" else: # Regular cell formatted_cell = f"{' ' * left_pad}{cell}{' ' * right_pad}" formatted_cells.append(formatted_cell) return formatted_cells
[docs] def _strip_ansi(self, text: str) -> str: """Strip ANSI escape codes from text for width calculation.""" import re ansi_escape = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])") return ansi_escape.sub("", text)
[docs] class FencedCodePreprocessor(Preprocessor): """Preprocessor that handles fenced code blocks before markdown splits text into blocks. This processor identifies fenced code blocks and stores them with unique markers, preventing markdown from processing the code content. The markers are later replaced by the tree processor. """ # Pattern to match fenced code blocks (with optional leading whitespace) FENCED_BLOCK_RE = re.compile( r"^[ \t]*```(?P<lang>[\w#+.-]*)[^\n]*\n" # Opening fence with optional language r"(?P<code>.*?)" # Code content (non-greedy) r"^[ \t]*```\s*$", # Closing fence re.MULTILINE | re.DOTALL, ) def __init__(self, md: markdown.Markdown): """Initialize the preprocessor. Args: md: The Markdown instance """ super().__init__(md) # Store code blocks for later retrieval # Using dynamic attribute assignment (type: ignore for mypy) if not hasattr(md, "_code_blocks"): md._code_blocks = {} # type: ignore[attr-defined] self.code_blocks: Dict[str, Dict[str, str]] = md._code_blocks # type: ignore[attr-defined] self.counter = 0
[docs] def run(self, lines: List[str]) -> List[str]: """Process fenced code blocks in the text. Args: lines: List of text lines to process Returns: List of processed lines with code blocks replaced by markers """ # Join lines into full text for regex matching text = "\n".join(lines) # Find and replace all fenced code blocks def replace_code_block(match): """Replace a matched code block with a marker.""" lang = match.group("lang") or "" code = match.group("code") # Detect and remove base indentation from code code = self._dedent_code(code, match.group(0)) # Generate unique marker marker_id = f"CODEBLOCK{self.counter}" self.counter += 1 # Store code block data self.code_blocks[marker_id] = {"lang": lang, "code": code} # Return a placeholder that will become a <pre><code> element # Use a format that markdown will parse as HTML code_html = self._create_code_html(code, lang, marker_id) placeholder = self.md.htmlStash.store(code_html) return placeholder # Replace all fenced code blocks text = self.FENCED_BLOCK_RE.sub(replace_code_block, text) # Return as lines return text.split("\n")
[docs] def _dedent_code(self, code: str, full_match: str) -> str: """Remove base indentation from code block content. Args: code: The code content full_match: The full matched text including fences Returns: Code with base indentation removed """ # Find the indentation of the opening fence first_line = full_match.split("\n")[0] base_indent = len(first_line) - len(first_line.lstrip()) if base_indent == 0: return code # Remove base indentation from each line lines = code.split("\n") dedented_lines = [] for line in lines: if line.startswith(" " * base_indent): dedented_lines.append(line[base_indent:]) elif line.startswith("\t" * base_indent): dedented_lines.append(line[base_indent:]) elif line.strip() == "": # Preserve empty lines dedented_lines.append("") else: # Line has less indentation than base, keep as is dedented_lines.append(line.lstrip()) return "\n".join(dedented_lines)
[docs] def _create_code_html(self, code: str, language: str, marker_id: str) -> str: """Create a simple marker for the code block. Args: code: The code content language: The programming language (can be empty) marker_id: Unique marker ID for retrieval Returns: Marker string that will be replaced by postprocessor """ # Return a simple marker that the postprocessor will replace return f"<!--{marker_id}-->"
[docs] class CodeBlockPostprocessor(Postprocessor): """Postprocessor that replaces code block markers with ANSI formatted code.""" def __init__(self, md: markdown.Markdown, renderer: ANSIRenderer): """Initialize the postprocessor. Args: md: The Markdown instance renderer: ANSI renderer for formatting code blocks """ super().__init__(md) self.renderer = renderer self.code_blocks = getattr(md, "_code_blocks", {})
[docs] def run(self, text: str) -> str: """Replace code block markers with ANSI formatted code. Args: text: The processed markdown text Returns: Text with code blocks rendered as ANSI """ # Replace each marker with formatted code for marker_id, block_data in self.code_blocks.items(): marker = f"<!--{marker_id}-->" if marker in text: code = block_data["code"] lang = block_data["lang"] formatted = self.renderer.code_block(code, lang) text = text.replace(marker, formatted) return text
[docs] class ANSITreeProcessor(Treeprocessor): """Tree processor that converts HTML elements to ANSI formatted text.""" def __init__(self, md: markdown.Markdown, renderer: ANSIRenderer): super().__init__(md) self.renderer = renderer self.parent_map: Dict[etree.Element, etree.Element] = {} self.list_counters: Dict[etree.Element, int] = {} self._setup_tag_formatters()
[docs] def _setup_tag_formatters(self): """Set up tag formatting dispatch table.""" self._tag_formatters = { # Text formatting "strong": self._format_bold, "b": self._format_bold, "em": self._format_italic, "i": self._format_italic, "u": self._format_underline, "del": self._format_strikethrough, "s": self._format_strikethrough, "code": self._format_code, # Block elements "pre": self._format_pre, "blockquote": self._format_blockquote, "hr": self._format_hr, "p": self._format_paragraph, # Links and media "a": self._format_link, "img": self._format_image, # Lists "li": self._format_list_item, "ul": self._format_list_container, "ol": self._format_list_container, # Tables "table": self._format_table, "thead": self._format_table_element, "tbody": self._format_table_element, "td": self._format_table_element, "th": self._format_table_element, "tr": self._format_table_row, # Other "br": self._format_br, }
[docs] def run(self, root: etree.Element) -> None: """Process the element tree and convert to ANSI text.""" # First pass: set up parent relationships self._set_parent_relationships(root) text = self._process_element(root) # Replace the root with a single text node root.clear() root.text = text root.tag = "div" # Use a neutral container
[docs] def _set_parent_relationships( self, elem: etree.Element, parent: Optional[etree.Element] = None ) -> None: """Set parent relationships for all elements.""" if parent is not None: self.parent_map[elem] = parent for child in elem: self._set_parent_relationships(child, elem)
[docs] def _process_element(self, elem: etree.Element) -> str: """Process a single element and its children.""" tag = elem.tag.lower() text = elem.text or "" tail = elem.tail or "" # Process children first child_text = "" for child in elem: child_text += self._process_element(child) # Combine text content content = text + child_text # Apply formatting based on tag formatted = self._format_by_tag(tag, elem, content) return formatted + tail
[docs] def _format_by_tag(self, tag: str, elem: etree.Element, content: str) -> str: """Format content based on HTML tag.""" # Check for headers first (h1-h6) if self._is_header_tag(tag): level = int(tag[1]) return self.renderer.header(wrap(content), level) # Use dispatch table for known tags formatter = self._tag_formatters.get(tag) if formatter: return formatter(elem, content) # Default: return wrapped content return wrap(content)
# Text formatting methods
[docs] def _format_bold(self, elem: etree.Element, content: str) -> str: return self.renderer.bold(wrap(content))
[docs] def _format_italic(self, elem: etree.Element, content: str) -> str: return self.renderer.italic(wrap(content))
[docs] def _format_underline(self, elem: etree.Element, content: str) -> str: return self.renderer.underline(wrap(content))
[docs] def _format_strikethrough(self, elem: etree.Element, content: str) -> str: return self.renderer.strikethrough(wrap(content))
[docs] def _format_code(self, elem: etree.Element, content: str) -> str: """Format code - inline or block depending on parent.""" parent = self.parent_map.get(elem) if parent is not None and parent.tag.lower() == "pre": return content # Handled by <pre> case return self.renderer.code_inline(content)
# Block element methods
[docs] def _format_pre(self, elem: etree.Element, content: str) -> str: return self._format_code_block(elem, wrap(content))
[docs] def _format_blockquote(self, elem: etree.Element, content: str) -> str: return self.renderer.blockquote(wrap(content))
[docs] def _format_hr(self, elem: etree.Element, content: str) -> str: return self.renderer.horizontal_rule()
[docs] def _format_paragraph(self, elem: etree.Element, content: str) -> str: return f"{wrap(content)}\n" if content.strip() else ""
# List methods
[docs] def _format_list_container(self, elem: etree.Element, content: str) -> str: return wrap(content)
[docs] def _format_list_item(self, elem: etree.Element, content: str) -> str: """Format list item element, determining if it's ordered or unordered.""" parent = self.parent_map.get(elem) if parent is None: return self.renderer.list_item(wrap(content), ordered=False) parent_tag = parent.tag.lower() if parent_tag == "ol": # Ordered list - track the index if parent not in self.list_counters: self.list_counters[parent] = 0 self.list_counters[parent] += 1 index = self.list_counters[parent] return self.renderer.list_item(wrap(content), ordered=True, index=index) # Unordered list or unknown parent return self.renderer.list_item(wrap(content), ordered=False)
# Table methods
[docs] def _format_table_element(self, elem: etree.Element, content: str) -> str: return wrap(content)
[docs] def _format_table_row(self, elem: etree.Element, content: str) -> str: return wrap(content)
# Link and media methods
[docs] def _format_image(self, elem: etree.Element, content: str) -> str: """Format image element.""" alt = elem.get("alt", "") src = elem.get("src", "") title = elem.get("title", "") return self.renderer.image(alt, src, title)
# Other methods
[docs] def _format_br(self, elem: etree.Element, content: str) -> str: return "\n"
[docs] def _is_header_tag(self, tag: str) -> bool: """Check if tag is a header tag (h1-h6).""" return tag.startswith("h") and len(tag) == 2 and tag[1].isdigit()
[docs] def _format_code_block(self, elem: etree.Element, content: str) -> str: """Format code block element.""" code_elem = elem.find("code") if code_elem is not None: language = code_elem.get("class", "").replace("language-", "") # Get the text content - ElementTree automatically unescapes HTML entities code_text = code_elem.text or "" return self.renderer.code_block(code_text, language) else: return self.renderer.code_block(content)
[docs] def _format_table(self, elem: etree.Element, content: str) -> str: """Format entire table element with proper column alignment.""" rows = [] has_header = False # Process all rows in the table for row_elem in elem.findall(".//tr"): cells = [] row_has_th = False # Get all cells in this row for cell_elem in row_elem.findall(".//td") + row_elem.findall(".//th"): cell_content = self._process_element(cell_elem) # Clean up cell content - remove extra whitespace and newlines cell_content = cell_content.strip().replace("\n", " ") cells.append(cell_content) if cell_elem.tag.lower() == "th": row_has_th = True if cells: # Only add non-empty rows rows.append(cells) if row_has_th: has_header = True if not rows: return "" return self.renderer.format_table(rows, header_row=has_header)
[docs] class ANSIExtension(Extension): """Main Python-Markdown extension that provides ANSI terminal output.""" def __init__(self, theme: Optional[Theme] = None, **kwargs): self.config = { "renderer": [ANSIRenderer(theme), "ANSI renderer instance"], } super().__init__(**kwargs)
[docs] def extendMarkdown(self, md: markdown.Markdown) -> None: """Register the ANSI processors.""" renderer = self.getConfig("renderer") # Register our fenced code preprocessor # This runs before markdown splits text into blocks fenced_preprocessor = FencedCodePreprocessor(md) md.preprocessors.register(fenced_preprocessor, "fenced_code_block", 25) # Register the tree processor to convert HTML to ANSI tree_processor = ANSITreeProcessor(md, renderer) md.treeprocessors.register(tree_processor, "ansi", 0) # Register the postprocessor to replace code block markers with ANSI # This runs after the tree processor code_postprocessor = CodeBlockPostprocessor(md, renderer) md.postprocessors.register(code_postprocessor, "code_blocks", 15)
# Convenience functions
[docs] def markdown_to_ansi(text: str, theme: Optional[Theme] = None, **kwargs) -> str: """Convert markdown text to ANSI formatted text. Args: text: Markdown text to convert theme: Theme instance to use for colors. If None, uses default theme. **kwargs: Additional arguments passed to markdown.markdown() Returns: ANSI formatted text suitable for terminal display """ md = ANSIMarkdown(theme=theme, **kwargs) return md.convert(text)
[docs] class ANSIMarkdown(markdown.Markdown): """Markdown processor that converts markdown to ANSI formatted text. This is a convenience class that can be used in place of a markdown.Markdown instance to render markdown to ANSI formatted text suitable for terminal display.""" def __init__(self, theme: Optional[Theme] = None, **kwargs): super().__init__(extensions=[ANSIExtension(theme), "tables"], **kwargs)