""" EdgarTools HTML Parser v2.0 A high-performance, semantically-aware HTML parser for SEC filings. """ from edgar.documents.parser import HTMLParser from edgar.documents.document import Document from edgar.documents.config import ParserConfig from edgar.documents.exceptions import ParsingError from edgar.documents.types import NodeType, SemanticType, TableType from edgar.documents.search import DocumentSearch, SearchResult, SearchMode from edgar.documents.renderers import MarkdownRenderer, TextRenderer __version__ = "2.0.0" __all__ = [ 'HTMLParser', 'Document', 'ParserConfig', 'ParsingError', 'NodeType', 'SemanticType', 'TableType', 'DocumentSearch', 'SearchResult', 'SearchMode', 'MarkdownRenderer', 'TextRenderer', 'parse_html' ] def parse_html(html: str, config: ParserConfig = None) -> Document: """ Convenience function for parsing HTML. Args: html: HTML content to parse config: Optional parser configuration Returns: Parsed Document object Example: >>> document = parse_html(html_content) >>> print(document.text()[:100]) """ parser = HTMLParser(config or ParserConfig()) return parser.parse(html)