Initial commit

2025-12-09 12:13:01 +01:00
commit 8e654ed209
13332 changed files with 2695056 additions and 0 deletions
--- a/venv/lib/python3.10/site-packages/edgar/documents/renderers/init.py
+++ b/venv/lib/python3.10/site-packages/edgar/documents/renderers/init.py
@@ -0,0 +1,13 @@
+"""
+Document renderers for various output formats.
+"""
+
+from edgar.documents.renderers.markdown import MarkdownRenderer
+from edgar.documents.renderers.text import TextRenderer
+from edgar.documents.renderers.fast_table import FastTableRenderer
+
+__all__ = [
+    'MarkdownRenderer',
+    'TextRenderer',
+    'FastTableRenderer'
+]
--- a/venv/lib/python3.10/site-packages/edgar/documents/renderers/pycache/init.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/documents/renderers/pycache/init.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/documents/renderers/pycache/fast_table.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/documents/renderers/pycache/fast_table.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/documents/renderers/pycache/markdown.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/documents/renderers/pycache/markdown.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/documents/renderers/pycache/text.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/documents/renderers/pycache/text.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/documents/renderers/fast_table.py
+++ b/venv/lib/python3.10/site-packages/edgar/documents/renderers/fast_table.py
@@ -0,0 +1,669 @@
+"""
+Fast table renderer for edgar.documents - optimized for performance.
+
+This module provides a high-performance alternative to Rich table rendering
+while maintaining professional output quality and readability.
+
+Performance target: ~32x faster than Rich rendering (0.2ms vs 6.5ms per table)
+"""
+
+from dataclasses import dataclass
+from typing import List, Dict, Optional, Union, Tuple
+from enum import Enum
+
+
+class Alignment(Enum):
+    """Column alignment options."""
+    LEFT = "left"
+    RIGHT = "right"
+    CENTER = "center"
+
+
+@dataclass
+class ColumnConfig:
+    """Configuration for a table column."""
+    alignment: Alignment = Alignment.LEFT
+    min_width: int = 8
+    max_width: Optional[int] = None
+    padding: int = 1
+
+
+@dataclass
+class TableStyle:
+    """Table styling configuration."""
+    border_char: str = "|"
+    header_separator: str = "-"
+    corner_char: str = "+"
+    padding: int = 1
+    min_col_width: int = 8
+    max_col_width: int = 50
+    
+    @classmethod
+    def pipe_table(cls) -> 'TableStyle':
+        """Markdown-compatible pipe table style."""
+        return cls(
+            border_char="|",
+            header_separator="-",
+            corner_char="|",
+            padding=1,
+            min_col_width=8,
+            max_col_width=50
+        )
+    
+    @classmethod
+    def minimal(cls) -> 'TableStyle':
+        """Minimal table style with spacing only."""
+        return cls(
+            border_char="",
+            header_separator="",
+            corner_char="",
+            padding=2,
+            min_col_width=6,
+            max_col_width=40
+        )
+
+    @classmethod
+    def simple(cls) -> 'TableStyle':
+        """
+        Simple table style matching Rich's box.SIMPLE.
+
+        Features:
+        - No outer border
+        - No column separators
+        - Single horizontal line under header
+        - Space-separated columns with generous padding
+        - Clean, professional appearance
+
+        This style provides the best balance of visual quality and performance,
+        matching Rich's box.SIMPLE aesthetic while maintaining fast rendering speed.
+        """
+        return cls(
+            border_char="",            # No pipes/borders
+            header_separator="─",      # Unicode horizontal line
+            corner_char="",            # No corners
+            padding=2,                 # Generous spacing (was 1 in pipe_table)
+            min_col_width=6,          # Slightly relaxed (was 8)
+            max_col_width=60          # Raised from 50 for wider columns
+        )
+
+
+class FastTableRenderer:
+    """
+    High-performance table renderer optimized for speed.
+    
+    Features:
+    - 30x+ faster than Rich table rendering
+    - Professional, readable output
+    - Configurable alignment and styling
+    - Handles complex SEC filing table structures
+    - Markdown-compatible output
+    - Memory efficient
+    """
+    
+    def __init__(self, style: Optional[TableStyle] = None):
+        """Initialize renderer with optional style configuration."""
+        self.style = style or TableStyle.pipe_table()
+        
+        # Pre-compile format strings for performance
+        self._format_cache = {}
+    
+    def render_table_node(self, table_node) -> str:
+        """
+        Render a TableNode to text format with proper colspan/rowspan handling.
+
+        Args:
+            table_node: TableNode instance from edgar.documents
+
+        Returns:
+            Formatted table string
+        """
+        from edgar.documents.utils.table_matrix import TableMatrix
+
+        # Build matrix to handle colspan/rowspan properly
+        # This ensures cells are expanded to fill their full colspan/rowspan
+        matrix = TableMatrix()
+        matrix.build_from_rows(table_node.headers, table_node.rows)
+
+        # Extract headers from expanded matrix
+        headers = []
+        if table_node.headers:
+            for row_idx in range(len(table_node.headers)):
+                expanded_row = matrix.get_expanded_row(row_idx)
+                # Convert Cell objects to strings, handling None values
+                row_texts = [cell.text().strip() if cell else '' for cell in expanded_row]
+                headers.append(row_texts)
+
+        # Extract data rows from expanded matrix
+        rows = []
+        start_row = len(table_node.headers) if table_node.headers else 0
+        for row_idx in range(start_row, matrix.row_count):
+            expanded_row = matrix.get_expanded_row(row_idx)
+            # Convert Cell objects to strings, handling None values
+            row_texts = [cell.text().strip() if cell else '' for cell in expanded_row]
+            rows.append(row_texts)
+
+        # Render the table
+        table_text = self.render_table_data(headers, rows)
+
+        # Add caption if present (matches Rich renderer behavior)
+        if hasattr(table_node, 'caption') and table_node.caption:
+            return f"{table_node.caption}\n{table_text}"
+
+        return table_text
+    
+    def render_table_data(self, headers: List[List[str]], rows: List[List[str]]) -> str:
+        """
+        Render table data with headers and rows.
+
+        Args:
+            headers: List of header rows (for multi-row headers)
+            rows: List of data rows
+
+        Returns:
+            Formatted table string
+        """
+        if not headers and not rows:
+            return ""
+
+        # Determine column count from all rows (headers + data)
+        all_rows = headers + rows if headers else rows
+        if not all_rows:
+            return ""
+
+        max_cols = max(len(row) for row in all_rows) if all_rows else 0
+        if max_cols == 0:
+            return ""
+
+        # Filter out empty/spacing columns
+        meaningful_columns = self._identify_meaningful_columns(all_rows, max_cols)
+        if not meaningful_columns:
+            return ""
+
+        # Filter all rows (both headers and data) to only meaningful columns
+        filtered_headers = [self._filter_row_to_columns(row, meaningful_columns) for row in headers] if headers else []
+        filtered_rows = [self._filter_row_to_columns(row, meaningful_columns) for row in rows]
+
+        # Post-process to merge related columns (e.g., currency symbols with amounts)
+        # Apply to all rows including headers
+        all_filtered = filtered_headers + filtered_rows
+        if all_filtered:
+            # Merge using first filtered row as reference
+            _, all_merged = self._merge_related_columns(all_filtered[0], all_filtered)
+            # Split back into headers and data
+            if filtered_headers:
+                filtered_headers = all_merged[:len(filtered_headers)]
+                filtered_rows = all_merged[len(filtered_headers):]
+            else:
+                filtered_rows = all_merged
+
+        # Recalculate with filtered and merged data
+        filtered_all_rows = filtered_headers + filtered_rows if filtered_headers else filtered_rows
+        filtered_max_cols = max(len(row) for row in filtered_all_rows) if filtered_all_rows else 0
+
+        # Calculate optimal column widths for filtered columns
+        col_widths = self._calculate_column_widths(filtered_all_rows, filtered_max_cols)
+
+        # Detect column alignments based on filtered content
+        alignments = self._detect_alignments(filtered_all_rows, filtered_max_cols)
+
+        # Build table with filtered data - pass headers as multiple rows
+        return self._build_table(filtered_headers, filtered_rows, col_widths, alignments)
+    
+    def _combine_headers(self, headers: List[List[str]]) -> List[str]:
+        """
+        Combine multi-row headers intelligently.
+        
+        For SEC tables, this prioritizes specific dates/periods over generic labels.
+        """
+        if not headers:
+            return []
+        
+        if len(headers) == 1:
+            return headers[0]
+        
+        # Determine max columns across all header rows
+        max_cols = max(len(row) for row in headers) if headers else 0
+        combined = [""] * max_cols
+        
+        for col in range(max_cols):
+            # Collect all values for this column
+            values = []
+            for header_row in headers:
+                if col < len(header_row) and header_row[col].strip():
+                    values.append(header_row[col].strip())
+            
+            if values:
+                # Prioritize date-like values over generic terms
+                date_values = [v for v in values if self._looks_like_date(v)]
+                if date_values:
+                    combined[col] = date_values[0]
+                elif len(values) == 1:
+                    combined[col] = values[0]
+                else:
+                    # Skip generic terms like "Year Ended" if we have something more specific
+                    specific_values = [v for v in values 
+                                     if v.lower() not in {'year ended', 'years ended', 'period ended'}]
+                    combined[col] = specific_values[0] if specific_values else values[0]
+        
+        return combined
+    
+    def _looks_like_date(self, text: str) -> bool:
+        """Quick date detection for header processing."""
+        if not text or len(text) < 4:
+            return False
+        
+        text_lower = text.lower().replace('\n', ' ').strip()
+        
+        # Common date indicators
+        date_indicators = [
+            'january', 'february', 'march', 'april', 'may', 'june',
+            'july', 'august', 'september', 'october', 'november', 'december',
+            '20', '19',  # Year prefixes
+        ]
+        
+        return any(indicator in text_lower for indicator in date_indicators) and \
+               any(c.isdigit() for c in text)
+    
+    def _identify_meaningful_columns(self, all_rows: List[List[str]], max_cols: int) -> List[int]:
+        """
+        Identify columns that contain meaningful content (not just spacing).
+        
+        Returns:
+            List of column indices that have meaningful content
+        """
+        column_scores = []
+        
+        for col_idx in range(max_cols):
+            content_score = 0
+            total_rows = 0
+            
+            # Score each column based on content quality
+            for row in all_rows:
+                if col_idx < len(row):
+                    total_rows += 1
+                    cell_content = str(row[col_idx]).strip()
+                    
+                    if cell_content:
+                        # Higher score for longer, more substantial content
+                        if len(cell_content) >= 3:  # Substantial content
+                            content_score += 3
+                        elif len(cell_content) == 2 and cell_content.isalnum():
+                            content_score += 2
+                        elif len(cell_content) == 1 and (cell_content.isalnum() or cell_content == '$'):
+                            content_score += 1
+                        # Skip single spaces, dashes, or other likely spacing characters
+            
+            # Calculate average score per row for this column
+            avg_score = content_score / max(total_rows, 1)
+            column_scores.append((col_idx, avg_score, content_score))
+        
+        # Sort by score descending
+        column_scores.sort(key=lambda x: x[1], reverse=True)
+        
+        # Take columns with meaningful content (score >= 0.5 or among top columns)
+        meaningful_columns = []
+        for col_idx, avg_score, total_score in column_scores:
+            # Include if it has good average score or significant total content
+            if avg_score >= 0.5 or total_score >= 5:
+                meaningful_columns.append(col_idx)
+            # Limit to reasonable number of columns for readability
+            if len(meaningful_columns) >= 8:
+                break
+        
+        # Sort by original column order
+        meaningful_columns.sort()
+        
+        return meaningful_columns
+    
+    def _filter_row_to_columns(self, row: List[str], column_indices: List[int]) -> List[str]:
+        """
+        Filter a row to only include the specified column indices.
+        
+        Args:
+            row: Original row data
+            column_indices: List of column indices to keep
+            
+        Returns:
+            Filtered row with only the specified columns
+        """
+        if not row:
+            return []
+        
+        filtered_row = []
+        for col_idx in column_indices:
+            if col_idx < len(row):
+                filtered_row.append(row[col_idx])
+            else:
+                filtered_row.append("")  # Missing column
+        
+        return filtered_row
+    
+    def _merge_related_columns(self, headers: List[str], rows: List[List[str]]) -> tuple:
+        """
+        Merge related columns (e.g., currency symbols with their amounts).
+        
+        Returns:
+            Tuple of (merged_headers, merged_rows)
+        """
+        if not rows or not any(rows):
+            return headers, rows
+        
+        # Find columns that should be merged
+        merge_pairs = []
+        max_cols = max(len(row) for row in [headers] + rows if row) if rows else len(headers) if headers else 0
+        
+        for col_idx in range(max_cols - 1):
+            # Check if this column and the next should be merged
+            should_merge = self._should_merge_columns(headers, rows, col_idx, col_idx + 1)
+            if should_merge:
+                merge_pairs.append((col_idx, col_idx + 1))
+        
+        # Apply merges (from right to left to avoid index shifting)
+        merged_headers = headers[:] if headers else []
+        merged_rows = [row[:] for row in rows]
+        
+        for left_idx, right_idx in reversed(merge_pairs):
+            # Merge headers
+            if merged_headers and left_idx < len(merged_headers) and right_idx < len(merged_headers):
+                left_header = merged_headers[left_idx].strip()
+                right_header = merged_headers[right_idx].strip()
+                merged_header = f"{left_header} {right_header}".strip()
+                merged_headers[left_idx] = merged_header
+                merged_headers.pop(right_idx)
+            
+            # Merge rows
+            for row in merged_rows:
+                if left_idx < len(row) and right_idx < len(row):
+                    left_cell = str(row[left_idx]).strip()
+                    right_cell = str(row[right_idx]).strip()
+                    
+                    # Smart merging based on content
+                    if left_cell == '$' and right_cell:
+                        merged_cell = f"${right_cell}"
+                    elif left_cell and right_cell:
+                        merged_cell = f"{left_cell} {right_cell}"
+                    else:
+                        merged_cell = left_cell or right_cell
+                    
+                    row[left_idx] = merged_cell
+                    if right_idx < len(row):
+                        row.pop(right_idx)
+        
+        return merged_headers, merged_rows
+    
+    def _should_merge_columns(self, headers: List[str], rows: List[List[str]], left_idx: int, right_idx: int) -> bool:
+        """
+        Determine if two adjacent columns should be merged.
+        
+        Returns:
+            True if columns should be merged
+        """
+        # Check if left column is mostly currency symbols
+        currency_count = 0
+        total_count = 0
+        
+        for row in rows:
+            if left_idx < len(row) and right_idx < len(row):
+                total_count += 1
+                left_cell = str(row[left_idx]).strip()
+                right_cell = str(row[right_idx]).strip()
+                
+                # If left is '$' and right is a number, they should be merged
+                if left_cell == '$' and right_cell and (right_cell.replace(',', '').replace('.', '').isdigit()):
+                    currency_count += 1
+        
+        # If most rows have currency symbol + number pattern, merge them
+        if total_count > 0 and currency_count / total_count >= 0.5:
+            return True
+        
+        # Check for other merge patterns (e.g., empty left column with content right column)
+        empty_left_count = 0
+        for row in rows:
+            if left_idx < len(row) and right_idx < len(row):
+                left_cell = str(row[left_idx]).strip()
+                right_cell = str(row[right_idx]).strip()
+                
+                if not left_cell and right_cell:
+                    empty_left_count += 1
+        
+        # If left column is mostly empty, consider merging
+        if total_count > 0 and empty_left_count / total_count >= 0.7:
+            return True
+        
+        return False
+    
+    def _calculate_column_widths(self, all_rows: List[List[str]], max_cols: int) -> List[int]:
+        """Calculate optimal column widths based on content."""
+        col_widths = [self.style.min_col_width] * max_cols
+        
+        # Find the maximum content width for each column
+        for row in all_rows:
+            for col_idx in range(min(len(row), max_cols)):
+                content = str(row[col_idx]) if row[col_idx] else ""
+                # Handle multi-line content
+                max_line_width = max((len(line) for line in content.split('\n')), default=0)
+                content_width = max_line_width + (self.style.padding * 2)
+                
+                # Apply limits
+                content_width = min(content_width, self.style.max_col_width)
+                col_widths[col_idx] = max(col_widths[col_idx], content_width)
+        
+        return col_widths
+    
+    def _detect_alignments(self, all_rows: List[List[str]], max_cols: int) -> List[Alignment]:
+        """Detect appropriate alignment for each column based on content."""
+        alignments = [Alignment.LEFT] * max_cols
+        
+        for col_idx in range(max_cols):
+            # Analyze column content (skip header row if present)
+            data_rows = all_rows[1:] if len(all_rows) > 1 else all_rows
+            
+            numeric_count = 0
+            total_count = 0
+            
+            for row in data_rows:
+                if col_idx < len(row) and row[col_idx].strip():
+                    total_count += 1
+                    content = row[col_idx].strip()
+                    
+                    # Check if content looks numeric (currency, percentages, numbers)
+                    if self._looks_numeric(content):
+                        numeric_count += 1
+            
+            # If most values in column are numeric, right-align
+            if total_count > 0 and numeric_count / total_count >= 0.7:
+                alignments[col_idx] = Alignment.RIGHT
+        
+        return alignments
+    
+    def _looks_numeric(self, text: str) -> bool:
+        """Check if text content looks numeric."""
+        if not text:
+            return False
+        
+        # Remove common formatting characters
+        clean_text = text.replace(',', '').replace('$', '').replace('%', '').replace('(', '').replace(')', '').strip()
+        
+        # Handle negative numbers in parentheses
+        if text.strip().startswith('(') and text.strip().endswith(')'):
+            clean_text = text.strip()[1:-1].replace(',', '').replace('$', '').strip()
+        
+        # Check if remaining text is numeric
+        try:
+            float(clean_text)
+            return True
+        except ValueError:
+            return False
+    
+    def _build_table(self, headers: List[List[str]], rows: List[List[str]],
+                    col_widths: List[int], alignments: List[Alignment]) -> str:
+        """
+        Build the final table string.
+
+        Args:
+            headers: List of header rows (can be multiple rows for multi-row headers)
+            rows: List of data rows
+            col_widths: Column widths
+            alignments: Column alignments
+        """
+        lines = []
+
+        # Header rows (can be multiple)
+        if headers:
+            for header_row in headers:
+                # Only add header rows with meaningful content
+                if any(cell.strip() for cell in header_row):
+                    # Handle multi-line cells in header rows
+                    formatted_lines = self._format_multiline_row(header_row, col_widths, alignments)
+                    lines.extend(formatted_lines)
+
+            # Header separator (after all header rows)
+            if self.style.header_separator:
+                sep_line = self._create_separator_line(col_widths)
+                lines.append(sep_line)
+
+        # Data rows
+        for row in rows:
+            # Only add rows with meaningful content
+            if any(cell.strip() for cell in row):
+                row_line = self._format_row(row, col_widths, alignments)
+                lines.append(row_line)
+        
+        return '\n'.join(lines)
+    
+    def _format_row(self, row: List[str], col_widths: List[int], 
+                   alignments: List[Alignment]) -> str:
+        """Format a single row with proper alignment and padding."""
+        cells = []
+        border = self.style.border_char
+        
+        for col_idx, width in enumerate(col_widths):
+            # Get cell content
+            content = str(row[col_idx]) if col_idx < len(row) else ""
+            
+            # Handle multi-line content (take first line only for table)
+            if '\n' in content:
+                content = content.split('\n')[0]
+            
+            content = content.strip()
+            
+            # Calculate available width for content
+            available_width = width - (self.style.padding * 2)
+            
+            # Truncate if too long
+            if len(content) > available_width:
+                content = content[:available_width-3] + "..."
+            
+            # Apply alignment
+            alignment = alignments[col_idx] if col_idx < len(alignments) else Alignment.LEFT
+            
+            if alignment == Alignment.RIGHT:
+                aligned_content = content.rjust(available_width)
+            elif alignment == Alignment.CENTER:
+                aligned_content = content.center(available_width)
+            else:  # LEFT
+                aligned_content = content.ljust(available_width)
+            
+            # Add padding
+            padded_cell = ' ' * self.style.padding + aligned_content + ' ' * self.style.padding
+            cells.append(padded_cell)
+        
+        # Join with borders
+        if border:
+            return border + border.join(cells) + border
+        else:
+            return '  '.join(cells)
+    
+    def _format_multiline_row(self, row: List[str], col_widths: List[int],
+                              alignments: List[Alignment]) -> List[str]:
+        """
+        Format a row that may contain multi-line cells (cells with \n characters).
+
+        Returns a list of formatted lines, one for each line of text in the cells.
+        """
+        # Split each cell by newlines
+        cell_lines = []
+        max_lines = 1
+
+        for col_idx, content in enumerate(row):
+            lines = content.split('\n') if content else ['']
+            cell_lines.append(lines)
+            max_lines = max(max_lines, len(lines))
+
+        # Build output lines
+        output_lines = []
+        for line_idx in range(max_lines):
+            # Build row for this line
+            current_row = []
+            for col_idx in range(len(row)):
+                # Get the line for this cell, or empty string if this cell has fewer lines
+                if line_idx < len(cell_lines[col_idx]):
+                    current_row.append(cell_lines[col_idx][line_idx])
+                else:
+                    current_row.append('')
+
+            # Format this line
+            formatted_line = self._format_row(current_row, col_widths, alignments)
+            output_lines.append(formatted_line)
+
+        return output_lines
+
+    def _create_separator_line(self, col_widths: List[int]) -> str:
+        """
+        Create header separator line.
+
+        For bordered styles: |-------|-------|
+        For borderless styles:  ─────────────── (full width horizontal line)
+        """
+        sep_char = self.style.header_separator
+        border = self.style.border_char
+
+        if not sep_char:
+            # No separator at all (minimal style)
+            return ""
+
+        if border:
+            # Bordered style: create separator matching column widths
+            separators = []
+            for width in col_widths:
+                separators.append(sep_char * width)
+            return border + border.join(separators) + border
+        else:
+            # Borderless style (simple): single horizontal line across full width
+            # Calculate total width: sum of column widths + gaps between columns
+            total_width = sum(col_widths) + (len(col_widths) - 1) * 2  # 2-space gaps
+
+            # Add leading space for indentation (matching row indentation)
+            return " " + sep_char * total_width
+
+
+# Factory functions for easy usage
+def create_fast_renderer(style: str = "pipe") -> FastTableRenderer:
+    """
+    Create a FastTableRenderer with predefined style.
+    
+    Args:
+        style: Style name ("pipe", "minimal")
+    
+    Returns:
+        Configured FastTableRenderer instance
+    """
+    if style == "minimal":
+        return FastTableRenderer(TableStyle.minimal())
+    else:  # Default to pipe
+        return FastTableRenderer(TableStyle.pipe_table())
+
+
+def render_table_fast(table_node, style: str = "pipe") -> str:
+    """
+    Convenience function to quickly render a table.
+    
+    Args:
+        table_node: TableNode instance
+        style: Style name ("pipe", "minimal")
+    
+    Returns:
+        Formatted table string
+    """
+    renderer = create_fast_renderer(style)
+    return renderer.render_table_node(table_node)
--- a/venv/lib/python3.10/site-packages/edgar/documents/renderers/markdown.py
+++ b/venv/lib/python3.10/site-packages/edgar/documents/renderers/markdown.py
@@ -0,0 +1,613 @@
+"""
+Markdown renderer for parsed documents.
+"""
+
+from typing import List, Optional, Dict, Set
+
+from edgar.documents.document import Document
+from edgar.documents.nodes import Node, TextNode, HeadingNode, ParagraphNode, ListNode, ListItemNode
+from edgar.documents.table_nodes import TableNode
+
+
+class MarkdownRenderer:
+    """
+    Renders parsed documents to Markdown format.
+    
+    Features:
+    - Preserves document structure
+    - Handles tables with proper formatting
+    - Supports nested lists
+    - Includes metadata annotations
+    - Configurable output options
+    """
+    
+    def __init__(self,
+                 include_metadata: bool = False,
+                 include_toc: bool = False,
+                 max_heading_level: int = 6,
+                 table_format: str = 'pipe',
+                 wrap_width: Optional[int] = None):
+        """
+        Initialize markdown renderer.
+        
+        Args:
+            include_metadata: Include metadata annotations
+            include_toc: Generate table of contents
+            max_heading_level: Maximum heading level to render
+            table_format: Table format ('pipe', 'grid', 'simple')
+            wrap_width: Wrap text at specified width
+        """
+        self.include_metadata = include_metadata
+        self.include_toc = include_toc
+        self.max_heading_level = max_heading_level
+        self.table_format = table_format
+        self.wrap_width = wrap_width
+        
+        # Track state during rendering
+        self._toc_entries: List[tuple] = []
+        self._rendered_ids: Set[str] = set()
+        self._list_depth = 0
+        self._in_table = False
+    
+    def render(self, document: Document) -> str:
+        """
+        Render document to Markdown.
+        
+        Args:
+            document: Document to render
+            
+        Returns:
+            Markdown formatted text
+        """
+        self._reset_state()
+        
+        parts = []
+        
+        # Add metadata header if requested
+        if self.include_metadata:
+            parts.append(self._render_metadata(document))
+            parts.append("")
+        
+        # Placeholder for TOC
+        if self.include_toc:
+            toc_placeholder = "<!-- TOC -->"
+            parts.append(toc_placeholder)
+            parts.append("")
+        
+        # Render document content
+        content = self._render_node(document.root)
+        parts.append(content)
+        
+        # Join parts
+        markdown = "\n".join(parts)
+        
+        # Replace TOC placeholder
+        if self.include_toc and self._toc_entries:
+            toc = self._generate_toc()
+            markdown = markdown.replace(toc_placeholder, toc)
+        
+        return markdown.strip()
+    
+    def render_node(self, node: Node) -> str:
+        """
+        Render a specific node to Markdown.
+        
+        Args:
+            node: Node to render
+            
+        Returns:
+            Markdown formatted text
+        """
+        self._reset_state()
+        return self._render_node(node)
+    
+    def _reset_state(self):
+        """Reset renderer state."""
+        self._toc_entries = []
+        self._rendered_ids = set()
+        self._list_depth = 0
+        self._in_table = False
+    
+    def _render_node(self, node: Node) -> str:
+        """Render a node and its children."""
+        # Skip if already rendered (handles shared nodes)
+        if node.id in self._rendered_ids:
+            return ""
+        self._rendered_ids.add(node.id)
+        
+        # Dispatch based on node type
+        if isinstance(node, HeadingNode):
+            return self._render_heading(node)
+        elif isinstance(node, ParagraphNode):
+            return self._render_paragraph(node)
+        elif isinstance(node, TextNode):
+            return self._render_text(node)
+        elif isinstance(node, TableNode):
+            return self._render_table(node)
+        elif isinstance(node, ListNode):
+            return self._render_list(node)
+        elif isinstance(node, ListItemNode):
+            return self._render_list_item(node)
+        else:
+            # Default: render children
+            return self._render_children(node)
+    
+    def _render_heading(self, node: HeadingNode) -> str:
+        """Render heading node."""
+        # Limit heading level
+        level = min(node.level, self.max_heading_level)
+        
+        # Get heading text
+        text = node.text().strip()
+        if not text:
+            return ""
+        
+        # Add to TOC
+        if self.include_toc:
+            self._toc_entries.append((level, text, node.id))
+        
+        # Create markdown heading
+        markdown = "#" * level + " " + text
+        
+        # Add metadata if requested
+        if self.include_metadata and node.metadata:
+            metadata = self._format_metadata(node.metadata)
+            if metadata:
+                markdown += f" <!-- {metadata} -->"
+        
+        # Add children content
+        children_content = self._render_children(node)
+        if children_content:
+            markdown += "\n\n" + children_content
+        
+        return markdown
+    
+    def _render_paragraph(self, node: ParagraphNode) -> str:
+        """Render paragraph node."""
+        # Get paragraph content
+        content = self._render_children(node).strip()
+        if not content:
+            return ""
+        
+        # Wrap if requested
+        if self.wrap_width:
+            content = self._wrap_text(content, self.wrap_width)
+        
+        # Add metadata if requested
+        if self.include_metadata and node.metadata:
+            metadata = self._format_metadata(node.metadata)
+            if metadata:
+                content = f"<!-- {metadata} -->\n{content}"
+        
+        return content
+    
+    def _render_text(self, node: TextNode) -> str:
+        """Render text node."""
+        text = node.text()
+        
+        # Escape markdown special characters
+        text = self._escape_markdown(text)
+        
+        # Apply text formatting based on style
+        if node.style:
+            if node.style.font_weight in ['bold', '700', '800', '900']:
+                text = f"**{text}**"
+            elif node.style.font_style == 'italic':
+                text = f"*{text}*"
+            elif node.style.text_decoration == 'underline':
+                text = f"<u>{text}</u>"
+        
+        return text
+    
+    def _render_table(self, node: TableNode) -> str:
+        """Render table node."""
+        self._in_table = True
+        
+        parts = []
+        
+        # Add caption if present
+        if node.caption:
+            parts.append(f"**Table: {node.caption}**")
+            parts.append("")
+        
+        # Render based on format
+        if self.table_format == 'pipe':
+            table_md = self._render_table_pipe(node)
+        elif self.table_format == 'grid':
+            table_md = self._render_table_grid(node)
+        else:  # simple
+            table_md = self._render_table_simple(node)
+        
+        parts.append(table_md)
+        
+        # Add metadata if requested
+        if self.include_metadata and node.metadata:
+            metadata = self._format_metadata(node.metadata)
+            if metadata:
+                parts.append(f"<!-- Table metadata: {metadata} -->")
+        
+        self._in_table = False
+        
+        return "\n".join(parts)
+    
+    def _render_table_pipe(self, node: TableNode) -> str:
+        """Render table in pipe format with proper column spanning support."""
+        # Handle complex SEC filing tables with column spanning
+        expanded_headers, expanded_data_rows = self._expand_table_structure(node)
+        
+        # Identify and filter to meaningful columns
+        content_columns = self._identify_content_columns(expanded_headers, expanded_data_rows)
+        
+        if not content_columns:
+            return ""
+        
+        rows = []
+        
+        # Render headers with intelligent multi-row combination
+        if expanded_headers:
+            combined_headers = self._combine_multi_row_headers(expanded_headers)
+            filtered_headers = [combined_headers[i] if i < len(combined_headers) else "" for i in content_columns]
+            
+            row_md = "| " + " | ".join(filtered_headers) + " |"
+            rows.append(row_md)
+            
+            # Add separator
+            separator = "| " + " | ".join(["---"] * len(filtered_headers)) + " |"
+            rows.append(separator)
+        
+        # Render data rows
+        for expanded_row in expanded_data_rows:
+            filtered_row = [expanded_row[i] if i < len(expanded_row) else "" for i in content_columns]
+            
+            # Only add rows with meaningful content
+            if any(cell.strip() for cell in filtered_row):
+                row_md = "| " + " | ".join(filtered_row) + " |"
+                rows.append(row_md)
+        
+        return "\n".join(rows)
+    
+    def _render_table_grid(self, node: TableNode) -> str:
+        """Render table in grid format."""
+        # Simplified grid format
+        all_rows = []
+        
+        # Add headers
+        if node.headers:
+            for header_row in node.headers:
+                cells = [cell.text() for cell in header_row]
+                all_rows.append(" | ".join(cells))
+        
+        # Add data rows
+        for row in node.rows:
+            cells = [cell.text() for cell in row.cells]
+            all_rows.append(" | ".join(cells))
+        
+        if all_rows:
+            # Add borders
+            max_width = max(len(row) for row in all_rows)
+            border = "+" + "-" * (max_width + 2) + "+"
+            result = [border]
+            for row in all_rows:
+                result.append(f"| {row:<{max_width}} |")
+            result.append(border)
+            return "\n".join(result)
+        
+        return ""
+    
+    def _render_table_simple(self, node: TableNode) -> str:
+        """Render table in simple format."""
+        rows = []
+        
+        # Add headers
+        if node.headers:
+            for header_row in node.headers:
+                cells = [cell.text() for cell in header_row]
+                rows.append("  ".join(cells))
+        
+        # Add separator if we have headers
+        if node.headers and node.rows:
+            rows.append("")
+        
+        # Add data rows
+        for row in node.rows:
+            cells = [cell.text() for cell in row.cells]
+            rows.append("  ".join(cells))
+        
+        return "\n".join(rows)
+    
+    def _render_list(self, node: ListNode) -> str:
+        """Render list node."""
+        self._list_depth += 1
+        
+        items = []
+        for child in node.children:
+            if isinstance(child, ListItemNode):
+                item_md = self._render_list_item(child)
+                if item_md:
+                    items.append(item_md)
+        
+        self._list_depth -= 1
+        
+        return "\n".join(items)
+    
+    def _render_list_item(self, node: ListItemNode) -> str:
+        """Render list item node."""
+        # Determine bullet/number
+        if node.parent and hasattr(node.parent, 'ordered') and node.parent.ordered:
+            # Ordered list
+            index = node.parent.children.index(node) + 1
+            marker = f"{index}."
+        else:
+            # Unordered list
+            markers = ['*', '-', '+']
+            marker = markers[(self._list_depth - 1) % len(markers)]
+        
+        # Indentation
+        indent = "  " * (self._list_depth - 1)
+        
+        # Get content
+        content = self._render_children(node).strip()
+        
+        # Format item
+        if '\n' in content:
+            # Multi-line content
+            lines = content.split('\n')
+            result = indent + marker + " " + lines[0]
+            for line in lines[1:]:
+                result += "\n" + indent + "  " + line
+            return result
+        else:
+            # Single line
+            return indent + marker + " " + content
+    
+    def _render_children(self, node: Node) -> str:
+        """Render all children of a node."""
+        parts = []
+        
+        for child in node.children:
+            child_md = self._render_node(child)
+            if child_md:
+                parts.append(child_md)
+        
+        # Join with appropriate separator
+        if self._in_table:
+            return " ".join(parts)
+        elif any(isinstance(child, (HeadingNode, ParagraphNode, TableNode, ListNode)) 
+                for child in node.children):
+            return "\n\n".join(parts)
+        else:
+            return " ".join(parts)
+    
+    def _render_metadata(self, document: Document) -> str:
+        """Render document metadata."""
+        lines = ["---"]
+        
+        if document.metadata.company:
+            lines.append(f"company: {document.metadata.company}")
+        if document.metadata.form:
+            lines.append(f"form: {document.metadata.form}")
+        if document.metadata.filing_date:
+            lines.append(f"filing_date: {document.metadata.filing_date}")
+        if document.metadata.cik:
+            lines.append(f"cik: {document.metadata.cik}")
+        if document.metadata.accession_number:
+            lines.append(f"accession_number: {document.metadata.accession_number}")
+        
+        lines.append("---")
+        
+        return "\n".join(lines)
+    
+    def _generate_toc(self) -> str:
+        """Generate table of contents."""
+        lines = ["## Table of Contents", ""]
+        
+        for level, text, node_id in self._toc_entries:
+            # Create anchor link
+            anchor = self._create_anchor(text)
+            
+            # Indentation based on level
+            indent = "  " * (level - 1)
+            
+            # Add TOC entry
+            lines.append(f"{indent}- [{text}](#{anchor})")
+        
+        return "\n".join(lines)
+    
+    def _create_anchor(self, text: str) -> str:
+        """Create anchor from heading text."""
+        # Convert to lowercase and replace spaces with hyphens
+        anchor = text.lower()
+        anchor = anchor.replace(' ', '-')
+        
+        # Remove special characters
+        import re
+        anchor = re.sub(r'[^a-z0-9\-]', '', anchor)
+        
+        # Remove multiple hyphens
+        anchor = re.sub(r'-+', '-', anchor)
+        
+        return anchor.strip('-')
+    
+    def _format_metadata(self, metadata: Dict) -> str:
+        """Format metadata for display."""
+        parts = []
+        
+        for key, value in metadata.items():
+            if key == 'semantic_type':
+                parts.append(f"type:{value}")
+            elif key == 'section':
+                parts.append(f"section:{value}")
+            elif key == 'ix_tag':
+                parts.append(f"xbrl:{value}")
+            else:
+                parts.append(f"{key}:{value}")
+        
+        return " ".join(parts)
+    
+    def _escape_markdown(self, text: str) -> str:
+        """Escape markdown special characters."""
+        # Don't escape in tables
+        if self._in_table:
+            return text
+        
+        # Escape special characters
+        for char in ['\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '#', '+', '-', '.', '!']:
+            text = text.replace(char, '\\' + char)
+        
+        return text
+    
+    def _wrap_text(self, text: str, width: int) -> str:
+        """Wrap text at specified width."""
+        import textwrap
+        return textwrap.fill(text, width=width, break_long_words=False)
+    
+    def _expand_table_structure(self, node: TableNode) -> tuple:
+        """
+        Expand table structure to handle column spanning properly.
+        Returns (expanded_headers, expanded_data_rows).
+        """
+        # Calculate the logical column count from colspan
+        max_columns = 0
+        
+        # Check all rows for maximum column span
+        all_rows = []
+        if node.headers:
+            for header_row in node.headers:
+                all_rows.append(header_row)
+        for row in node.rows:
+            all_rows.append(row.cells)
+        
+        for row in all_rows:
+            column_count = sum(cell.colspan for cell in row)
+            max_columns = max(max_columns, column_count)
+        
+        # Expand headers
+        expanded_headers = []
+        if node.headers:
+            for header_row in node.headers:
+                expanded = self._expand_row_to_columns(header_row, max_columns)
+                expanded_headers.append(expanded)
+        
+        # Expand data rows
+        expanded_data_rows = []
+        for row in node.rows:
+            expanded = self._expand_row_to_columns(row.cells, max_columns)
+            expanded_data_rows.append(expanded)
+        
+        return expanded_headers, expanded_data_rows
+    
+    def _expand_row_to_columns(self, cells: List, target_columns: int) -> List[str]:
+        """Expand a row with colspan cells to match the target column count."""
+        expanded = []
+        current_column = 0
+        
+        for cell in cells:
+            cell_text = cell.text().strip()
+            
+            # Add the cell content
+            expanded.append(cell_text)
+            current_column += 1
+            
+            # Add empty cells for remaining colspan
+            for _ in range(cell.colspan - 1):
+                if current_column < target_columns:
+                    expanded.append("")
+                    current_column += 1
+        
+        # Pad to target column count if needed
+        while len(expanded) < target_columns:
+            expanded.append("")
+        
+        return expanded[:target_columns]
+    
+    def _identify_content_columns(self, expanded_headers: List[List[str]], 
+                                 expanded_data_rows: List[List[str]]) -> List[int]:
+        """Identify which columns actually contain meaningful content."""
+        if not expanded_headers and not expanded_data_rows:
+            return []
+        
+        # Get the column count
+        max_cols = 0
+        if expanded_headers:
+            max_cols = max(max_cols, max(len(row) for row in expanded_headers))
+        if expanded_data_rows:
+            max_cols = max(max_cols, max(len(row) for row in expanded_data_rows))
+        
+        content_columns = []
+        
+        for col in range(max_cols):
+            has_content = False
+            
+            # Check headers
+            for header_row in expanded_headers:
+                if col < len(header_row) and header_row[col].strip():
+                    has_content = True
+                    break
+            
+            # Check data rows
+            if not has_content:
+                for data_row in expanded_data_rows:
+                    if col < len(data_row) and data_row[col].strip():
+                        has_content = True
+                        break
+            
+            if has_content:
+                content_columns.append(col)
+        
+        return content_columns
+    
+    def _combine_multi_row_headers(self, header_rows: List[List[str]]) -> List[str]:
+        """
+        Combine multi-row headers intelligently for SEC filing tables.
+        Prioritizes specific dates/periods over generic labels.
+        """
+        if not header_rows:
+            return []
+        
+        num_columns = len(header_rows[0])
+        combined = [""] * num_columns
+        
+        for col in range(num_columns):
+            # Collect all values for this column across header rows
+            column_values = []
+            for row in header_rows:
+                if col < len(row) and row[col].strip():
+                    column_values.append(row[col].strip())
+            
+            if column_values:
+                # Prioritize date-like values over generic labels
+                date_values = [v for v in column_values if self._looks_like_date(v)]
+                if date_values:
+                    # Clean up line breaks in dates
+                    combined[col] = date_values[0].replace('\n', ' ')
+                elif len(column_values) == 1:
+                    combined[col] = column_values[0].replace('\n', ' ')
+                else:
+                    # Skip generic terms like "Year Ended" if we have something more specific
+                    specific_values = [v for v in column_values 
+                                     if v.lower() not in ['year ended', 'years ended']]
+                    if specific_values:
+                        combined[col] = specific_values[0].replace('\n', ' ')
+                    else:
+                        combined[col] = column_values[0].replace('\n', ' ')
+        
+        return combined
+    
+    def _looks_like_date(self, text: str) -> bool:
+        """Check if text looks like a date."""
+        import re
+        
+        # Common date patterns in SEC filings
+        date_patterns = [
+            r'\b(January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},?\s*\d{4}',
+            r'\d{1,2}/\d{1,2}/\d{4}',
+            r'\d{4}-\d{2}-\d{2}',
+            r'^\d{4}$',  # Just a year
+        ]
+        
+        text_clean = text.replace('\n', ' ').strip()
+        for pattern in date_patterns:
+            if re.search(pattern, text_clean, re.IGNORECASE):
+                return True
+        
+        return False
--- a/venv/lib/python3.10/site-packages/edgar/documents/renderers/text.py
+++ b/venv/lib/python3.10/site-packages/edgar/documents/renderers/text.py
@@ -0,0 +1,51 @@
+"""
+Plain text renderer for parsed documents.
+"""
+
+from typing import Optional
+from edgar.documents.document import Document
+from edgar.documents.extractors.text_extractor import TextExtractor
+
+
+class TextRenderer:
+    """
+    Renders parsed documents to plain text.
+    
+    This is a simple wrapper around TextExtractor for consistency
+    with other renderers.
+    """
+    
+    def __init__(self,
+                 clean: bool = True,
+                 include_tables: bool = True,
+                 max_length: Optional[int] = None,
+                 preserve_structure: bool = False):
+        """
+        Initialize text renderer.
+        
+        Args:
+            clean: Clean and normalize text
+            include_tables: Include table content
+            max_length: Maximum text length
+            preserve_structure: Preserve document structure
+        """
+        self.extractor = TextExtractor(
+            clean=clean,
+            include_tables=include_tables,
+            include_metadata=False,
+            include_links=False,
+            max_length=max_length,
+            preserve_structure=preserve_structure
+        )
+    
+    def render(self, document: Document) -> str:
+        """
+        Render document to plain text.
+        
+        Args:
+            document: Document to render
+            
+        Returns:
+            Plain text
+        """
+        return self.extractor.extract(document)