Initial commit

2025-12-09 12:13:01 +01:00
commit 8e654ed209
13332 changed files with 2695056 additions and 0 deletions
--- a/venv/lib/python3.10/site-packages/edgar/thirteenf/parsers/init.py
+++ b/venv/lib/python3.10/site-packages/edgar/thirteenf/parsers/init.py
@@ -0,0 +1,11 @@
+"""13F filing parsers for different document formats."""
+
+from .primary_xml import parse_primary_document_xml
+from .infotable_xml import parse_infotable_xml
+from .infotable_txt import parse_infotable_txt
+
+__all__ = [
+    'parse_primary_document_xml',
+    'parse_infotable_xml',
+    'parse_infotable_txt',
+]
--- a/venv/lib/python3.10/site-packages/edgar/thirteenf/parsers/pycache/init.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/thirteenf/parsers/pycache/init.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/thirteenf/parsers/pycache/infotable_xml.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/thirteenf/parsers/pycache/infotable_xml.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/thirteenf/parsers/pycache/primary_xml.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/thirteenf/parsers/pycache/primary_xml.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/thirteenf/parsers/infotable_txt/init.py
+++ b/venv/lib/python3.10/site-packages/edgar/thirteenf/parsers/infotable_txt/init.py
@@ -0,0 +1,119 @@
+"""TXT format information table parsers with automatic format detection.
+
+Supports two TXT formats from 2012 filings:
+- Format 1 (Multiline): Company names can span multiple lines
+- Format 2 (Columnar): All data on single line with <S> and <C> tags
+"""
+
+import re
+import pandas as pd
+
+from .format_multiline import parse_multiline_format
+from .format_columnar import parse_columnar_format
+
+__all__ = ['parse_infotable_txt']
+
+
+def parse_infotable_txt(infotable_txt: str) -> pd.DataFrame:
+    """
+    Parse TXT format information table, auto-detecting format.
+
+    Supports:
+    - Format 1 (Multiline): Berkshire-style with multi-line company names
+    - Format 2 (Columnar): JANA-style with all data on single line
+
+    Args:
+        infotable_txt: TXT content containing the information table
+
+    Returns:
+        pd.DataFrame: Holdings data with same structure as XML parser
+    """
+    if _is_columnar_format(infotable_txt):
+        return parse_columnar_format(infotable_txt)
+    else:
+        return parse_multiline_format(infotable_txt)
+
+
+def _is_columnar_format(infotable_txt: str) -> bool:
+    """
+    Detect if this is columnar format by looking for <S> tags in data rows.
+
+    Columnar format has <S> at the start of each data row, followed by data.
+    Multiline format only has <S> and <C> in the header row.
+
+    Args:
+        infotable_txt: TXT content to analyze
+
+    Returns:
+        bool: True if columnar format, False if multiline format
+    """
+    # Find the Form 13F Information Table section (case-insensitive)
+    match = re.search(r'FORM\s+13F\s+INFORMATION\s+TABLE', infotable_txt, re.IGNORECASE)
+    if not match:
+        return False
+
+    # Extract tables (case-insensitive)
+    # Note: Search from beginning since <TABLE> tag may come before the header text
+    table_pattern = r'<TABLE>(.*?)</TABLE>'
+    tables = re.findall(table_pattern, infotable_txt, re.DOTALL | re.IGNORECASE)
+
+    if len(tables) == 0:
+        return False
+
+    # Determine which table to check
+    # If 2+ tables: check second table (first holdings table, after managers table)
+    # If 1 table: check that single table
+    if len(tables) >= 2:
+        holdings_table = tables[1]
+    else:
+        holdings_table = tables[0]
+
+    lines = holdings_table.split('\n')
+
+    # Count data rows with <S> tags that also have CUSIPs
+    # In columnar format, data rows start with <S> and have CUSIP on same line
+    # In multiline format, only header has <S>, and CUSIP is on second line of company
+    data_rows_with_s_and_cusip = 0
+    data_rows_checked = 0
+
+    for line in lines:
+        line = line.strip()
+        line_upper = line.upper()
+
+        # Skip empty lines, CAPTION, and header rows (case-insensitive)
+        if not line or '<CAPTION>' in line_upper:
+            continue
+
+        # Skip if this looks like a header (has <S> but no digits)
+        if '<S>' in line_upper and not re.search(r'\d', line):
+            continue
+
+        # Check if this line has both <S> tag and a CUSIP (9 chars with digit, with or without spaces)
+        cusip_match = re.search(r'\b([A-Za-z0-9]{9})\b', line)
+        has_valid_cusip = cusip_match and any(c.isdigit() for c in cusip_match.group(1))
+
+        # Also check for spaced CUSIPs
+        if not has_valid_cusip:
+            spaced_matches = re.finditer(r'\b([A-Za-z0-9 ]{9,15})\b', line)
+            for match in spaced_matches:
+                cleaned = match.group(1).replace(' ', '')
+                if len(cleaned) == 9 and any(c.isdigit() for c in cleaned):
+                    has_valid_cusip = True
+                    break
+
+        if '<S>' in line_upper and has_valid_cusip:
+            data_rows_with_s_and_cusip += 1
+            data_rows_checked += 1
+        elif has_valid_cusip:
+            # Has CUSIP but no <S> - multiline format
+            data_rows_checked += 1
+
+        # If we've checked 3 data rows, that's enough to decide
+        if data_rows_checked >= 3:
+            break
+
+    # If most data rows with CUSIPs also have <S> tags, it's columnar format
+    if data_rows_checked > 0 and data_rows_with_s_and_cusip >= data_rows_checked * 0.5:
+        return True
+
+    return False
--- a/venv/lib/python3.10/site-packages/edgar/thirteenf/parsers/infotable_txt/pycache/init.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/thirteenf/parsers/infotable_txt/pycache/init.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/thirteenf/parsers/infotable_txt/pycache/format_columnar.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/thirteenf/parsers/infotable_txt/pycache/format_columnar.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/thirteenf/parsers/infotable_txt/pycache/format_multiline.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/thirteenf/parsers/infotable_txt/pycache/format_multiline.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/thirteenf/parsers/infotable_txt/format_columnar.py
+++ b/venv/lib/python3.10/site-packages/edgar/thirteenf/parsers/infotable_txt/format_columnar.py
@@ -0,0 +1,286 @@
+"""Parser for columnar TXT format (Format 2) used in some 2012 filings.
+
+This format has <S> and <C> tags for each field, with all data on a single line.
+
+Example:
+    <S>                     <C>           <C>       <C>      <C>
+    AETNA INC               NEW COM       00817Y108 92,760   2,342,435 SH SOLE 2,238,895 103,540 0
+"""
+
+import re
+import pandas as pd
+
+from edgar.reference import cusip_ticker_mapping
+
+__all__ = ['parse_columnar_format']
+
+
+def parse_columnar_format(infotable_txt: str) -> pd.DataFrame:
+    """
+    Parse columnar TXT format (Format 2) information table.
+
+    This parser handles the format where all data is on a single line with
+    <S> and <C> tags marking column boundaries.
+
+    Args:
+        infotable_txt: TXT content containing the information table
+
+    Returns:
+        pd.DataFrame: Holdings data with same structure as XML parser
+    """
+    # Find the Form 13F Information Table section (case-insensitive)
+    match = re.search(r'FORM\s+13F\s+INFORMATION\s+TABLE', infotable_txt, re.IGNORECASE)
+    if not match:
+        return pd.DataFrame()
+
+    # Extract all table content between <TABLE> and </TABLE> tags (case-insensitive)
+    # Note: Search from beginning since <TABLE> tag may come before the header text
+    table_pattern = r'<TABLE>(.*?)</TABLE>'
+    tables = re.findall(table_pattern, infotable_txt, re.DOTALL | re.IGNORECASE)
+
+    if len(tables) == 0:
+        return pd.DataFrame()
+
+    # Determine which tables to process:
+    # - If 2+ tables: Skip first table (usually managers list), process rest
+    # - If 1 table: Check if it has holdings data (CUSIPs with <S> tags), if so process it
+    if len(tables) >= 2:
+        holdings_tables = tables[1:]  # Skip first table (managers)
+    elif len(tables) == 1:
+        # Check if the single table has holdings data (contains CUSIPs with <S> tags)
+        # Look for lines that have both <S> tag and valid CUSIP (with or without spaces)
+        potential_lines = [line for line in tables[0].split('\n') if '<S>' in line.upper()]
+        has_data = False
+        for line in potential_lines[:10]:  # Check first 10 <S> lines
+            # Try non-spaced CUSIPs first
+            cusip_match = re.search(r'\b([A-Za-z0-9]{9})\b', line)
+            if cusip_match and any(c.isdigit() for c in cusip_match.group(1)):
+                has_data = True
+                break
+            # Try spaced CUSIPs
+            spaced_matches = re.finditer(r'\b([A-Za-z0-9 ]{9,15})\b', line)
+            for match in spaced_matches:
+                cleaned = match.group(1).replace(' ', '')
+                if len(cleaned) == 9 and any(c.isdigit() for c in cleaned):
+                    has_data = True
+                    break
+            if has_data:
+                break
+        if has_data:
+            holdings_tables = tables  # Process the single table
+        else:
+            return pd.DataFrame()  # No holdings data
+    else:
+        return pd.DataFrame()
+
+    parsed_rows = []
+
+    for holdings_table in holdings_tables:
+        # Skip if this is the totals table (very short, < 200 chars)
+        if len(holdings_table.strip()) < 200:
+            continue
+
+        lines = holdings_table.split('\n')
+
+        for line in lines:
+            line = line.strip()
+
+            # Skip empty lines, CAPTION lines, header rows (case-insensitive)
+            line_upper = line.upper()
+            if not line or '<CAPTION>' in line_upper:
+                continue
+
+            # Skip header rows with just tags (case-insensitive)
+            # Header rows have <S> but no valid CUSIPs (9 chars with at least one digit, with or without spaces)
+            if line_upper.startswith('<S>'):
+                # Check for normal 9-char CUSIP
+                has_cusip = False
+                cusip_check = re.search(r'\b([A-Za-z0-9]{9})\b', line)
+                if cusip_check and any(c.isdigit() for c in cusip_check.group(1)):
+                    has_cusip = True
+
+                # If not found, check for spaced CUSIP
+                if not has_cusip:
+                    spaced_check = re.finditer(r'\b([A-Za-z0-9 ]{9,15})\b', line)
+                    for match in spaced_check:
+                        cleaned = match.group(1).replace(' ', '')
+                        if len(cleaned) == 9 and any(c.isdigit() for c in cleaned):
+                            has_cusip = True
+                            break
+
+                if not has_cusip:
+                    continue
+
+            if line.startswith(('Total', 'Title', 'NAME OF ISSUER', 'of', 'Market Value')):
+                continue
+
+            # Look for data rows with <S> tag and a CUSIP (case-insensitive)
+            if '<S>' not in line_upper:
+                continue
+
+            # CUSIP is a reliable anchor - it's always 9 alphanumeric characters (case-insensitive)
+            # Must contain at least one digit to avoid matching company names or words like "SPONSORED"
+            # Some filings have spaces in CUSIPs: "00724F 10 1" should be "00724F101"
+            # Find ALL potential CUSIP sequences (with or without spaces), then pick the first valid one
+
+            # First try without spaces (faster path)
+            cusip_match = None
+            cusip = None
+            all_cusip_matches = re.finditer(r'\b([A-Za-z0-9]{9})\b', line)
+            for match in all_cusip_matches:
+                if any(c.isdigit() for c in match.group(1)):
+                    cusip_match = match
+                    cusip = match.group(1)
+                    break
+
+            # If not found, try matching with spaces and cleaning
+            if not cusip_match:
+                # Match sequences of 9-15 chars that might contain spaces
+                spaced_matches = re.finditer(r'\b([A-Za-z0-9 ]{9,15})\b', line)
+                for match in spaced_matches:
+                    cleaned = match.group(1).replace(' ', '')
+                    # Check if cleaned version is exactly 9 chars and has a digit
+                    if len(cleaned) == 9 and any(c.isdigit() for c in cleaned):
+                        cusip_match = match
+                        cusip = cleaned  # Use cleaned version
+                        break
+
+            if not cusip_match:
+                continue
+
+            # Remove SGML tags and split by whitespace
+            # Replace <S> and <C> with spaces to help with splitting
+            cleaned_line = line.replace('<S>', ' ').replace('<C>', ' ')
+            parts = cleaned_line.split()
+
+            # Filter out empty parts
+            parts = [p for p in parts if p.strip()]
+
+            if len(parts) < 10:  # Need at least issuer, class, cusip, value, shares, type, discretion, sole, shared, none
+                continue
+
+            try:
+                # Find CUSIP position in parts
+                # cusip already set above (either from direct match or cleaned from spaced match)
+                # Try to find it in parts - it might be spaced or not spaced
+                cusip_idx = None
+                cusip_span = 1  # How many elements the CUSIP occupies in parts
+
+                # First try to find cleaned CUSIP as a single element
+                if cusip in parts:
+                    cusip_idx = parts.index(cusip)
+                else:
+                    # Try to find the original spaced version as a single element
+                    original_cusip = cusip_match.group(1)
+                    if original_cusip in parts:
+                        cusip_idx = parts.index(original_cusip)
+                    else:
+                        # For spaced CUSIPs split across multiple parts (e.g., "00724F 10 1" -> ["00724F", "10", "1"])
+                        # Look for a sequence of parts that, when joined, matches the cleaned CUSIP
+                        for i in range(len(parts) - 2):  # Need at least 3 parts for a split CUSIP
+                            # Try joining 2-4 consecutive parts
+                            for span in range(2, 5):
+                                if i + span > len(parts):
+                                    break
+                                joined = ''.join(parts[i:i+span])
+                                if joined == cusip:
+                                    cusip_idx = i
+                                    cusip_span = span
+                                    break
+                            if cusip_idx is not None:
+                                break
+
+                if cusip_idx is None:
+                    continue
+
+                # Before CUSIP: Issuer name and class
+                # Everything before CUSIP minus the last word (which is the class)
+                before_cusip = parts[:cusip_idx]
+                if len(before_cusip) < 2:
+                    continue
+
+                # Last part before CUSIP is the class, rest is issuer name
+                title_class = before_cusip[-1]
+                issuer_name = ' '.join(before_cusip[:-1])
+
+                # After CUSIP: value, shares, type (SH/PRN), discretion, sole, shared, none
+                # Skip cusip_span elements for spaced CUSIPs (e.g., ["00724F", "10", "1"])
+                after_cusip = parts[cusip_idx + cusip_span:]
+
+                if len(after_cusip) < 7:
+                    continue
+
+                # Parse fields after CUSIP
+                # Expected order: VALUE SHARES TYPE DISCRETION ... SOLE SHARED NONE
+                value_str = after_cusip[0].replace(',', '').replace('$', '')
+                shares_str = after_cusip[1].replace(',', '')
+
+                value = int(value_str) if value_str and value_str != '-' else 0
+                shares = int(shares_str) if shares_str and shares_str != '-' else 0
+
+                # Type (SH/PRN) is typically at index 2
+                share_type = after_cusip[2] if len(after_cusip) > 2 else 'SH'
+                if share_type == 'SH':
+                    share_type_full = 'Shares'
+                elif share_type == 'PRN':
+                    share_type_full = 'Principal'
+                else:
+                    share_type_full = 'Shares'
+
+                # Find investment discretion (typically "SOLE", "SHARED", "DEFINED", or compound like "SHARED-DEFINED")
+                # It's the first non-numeric field after type
+                discretion_idx = 3
+                investment_discretion = ''
+                for i in range(3, len(after_cusip) - 3):  # Last 3 are voting columns
+                    part = after_cusip[i]
+                    if part and part not in ['-'] and not part.replace(',', '').isdigit():
+                        investment_discretion = part
+                        discretion_idx = i
+                        break
+
+                # Voting columns are the last 3 fields
+                if len(after_cusip) >= 3:
+                    none_voting_str = after_cusip[-1].replace(',', '')
+                    shared_voting_str = after_cusip[-2].replace(',', '')
+                    sole_voting_str = after_cusip[-3].replace(',', '')
+
+                    non_voting = int(none_voting_str) if none_voting_str and none_voting_str != '-' else 0
+                    shared_voting = int(shared_voting_str) if shared_voting_str and shared_voting_str != '-' else 0
+                    sole_voting = int(sole_voting_str) if sole_voting_str and sole_voting_str != '-' else 0
+                else:
+                    sole_voting = 0
+                    shared_voting = 0
+                    non_voting = 0
+
+                # Create row dict
+                row_dict = {
+                    'Issuer': issuer_name,
+                    'Class': title_class,
+                    'Cusip': cusip,
+                    'Value': value,
+                    'SharesPrnAmount': shares,
+                    'Type': share_type_full,
+                    'PutCall': '',
+                    'InvestmentDiscretion': investment_discretion,
+                    'SoleVoting': sole_voting,
+                    'SharedVoting': shared_voting,
+                    'NonVoting': non_voting
+                }
+
+                parsed_rows.append(row_dict)
+
+            except (ValueError, IndexError) as e:
+                # Skip rows that don't parse correctly
+                continue
+
+    # Create DataFrame
+    if not parsed_rows:
+        return pd.DataFrame()
+
+    table = pd.DataFrame(parsed_rows)
+
+    # Add ticker symbols using CUSIP mapping
+    cusip_mapping = cusip_ticker_mapping(allow_duplicate_cusips=False)
+    table['Ticker'] = table.Cusip.map(cusip_mapping.Ticker)
+
+    return table
--- a/venv/lib/python3.10/site-packages/edgar/thirteenf/parsers/infotable_txt/format_multiline.py
+++ b/venv/lib/python3.10/site-packages/edgar/thirteenf/parsers/infotable_txt/format_multiline.py
@@ -0,0 +1,273 @@
+"""Parser for multiline TXT format (Format 1) used in some 2012 filings.
+
+This format has company names that can span multiple lines, with the CUSIP
+appearing on the same line as the continuation of the company name.
+
+Example:
+    AMERICAN
+      EXPRESS CO    COM    025816109  110999  1952142 Shared-Defined...
+"""
+
+import re
+import pandas as pd
+
+from edgar.reference import cusip_ticker_mapping
+
+__all__ = ['parse_multiline_format']
+
+
+def parse_multiline_format(infotable_txt: str) -> pd.DataFrame:
+    """
+    Parse multiline TXT format (Format 1) information table.
+
+    This parser handles the format where company names can span multiple lines,
+    with the CUSIP appearing on the line that contains the continuation.
+
+    Args:
+        infotable_txt: TXT content containing the information table
+
+    Returns:
+        pd.DataFrame: Holdings data with same structure as XML parser
+    """
+    # Find the Form 13F Information Table section (case-insensitive)
+    match = re.search(r'FORM\s+13F\s+INFORMATION\s+TABLE', infotable_txt, re.IGNORECASE)
+    if not match:
+        return pd.DataFrame()
+
+    # Extract all table content between <TABLE> and </TABLE> tags (case-insensitive)
+    # Note: Search from beginning since <TABLE> tag may come before the header text
+    table_pattern = r'<TABLE>(.*?)</TABLE>'
+    tables = re.findall(table_pattern, infotable_txt, re.DOTALL | re.IGNORECASE)
+
+    if len(tables) == 0:
+        return pd.DataFrame()
+
+    # Determine which tables to process:
+    # - If 2+ tables: Skip first table (usually managers list), process rest
+    # - If 1 table: Check if it has holdings data (CUSIPs), if so process it
+    if len(tables) >= 2:
+        holdings_tables = tables[1:]  # Skip first table (managers)
+    elif len(tables) == 1:
+        # Check if the single table has holdings data (contains CUSIPs with digits)
+        # Look for 9-char alphanumeric sequences (with or without spaces) that contain at least one digit
+        potential_cusips = re.findall(r'\b([A-Za-z0-9]{9})\b', tables[0])
+        # Also check for spaced CUSIPs
+        spaced_cusips = re.findall(r'\b([A-Za-z0-9 ]{9,15})\b', tables[0])
+        spaced_cusips_cleaned = [c.replace(' ', '') for c in spaced_cusips if len(c.replace(' ', '')) == 9]
+
+        has_valid_cusips = (
+            any(any(c.isdigit() for c in cusip) for cusip in potential_cusips) or
+            any(any(c.isdigit() for c in cusip) for cusip in spaced_cusips_cleaned)
+        )
+        if has_valid_cusips:
+            holdings_tables = tables  # Process the single table
+        else:
+            return pd.DataFrame()  # No holdings data
+    else:
+        return pd.DataFrame()
+
+    parsed_rows = []
+
+    for holdings_table in holdings_tables:
+        # Skip if this is the totals table (very short, < 200 chars)
+        if len(holdings_table.strip()) < 200:
+            continue
+
+        # Reset pending issuer parts for each table
+        pending_issuer_parts = []
+
+        lines = holdings_table.split('\n')
+
+        for line in lines:
+            orig_line = line
+            line = line.strip()
+
+            # Skip empty lines, CAPTION lines, header rows (case-insensitive)
+            line_upper = line.upper()
+            if not line or '<CAPTION>' in line_upper or '<S>' in line_upper or '<C>' in line_upper:
+                continue
+
+            # Skip separator lines (made of dashes and spaces)
+            if all(c in '- ' for c in line):
+                continue
+
+            # Skip header/title rows
+            line_upper = line.upper()
+            if line.startswith(('Total', 'Title', 'Name of Issuer', 'of', 'Market Value')):
+                continue
+
+            # Skip column header rows (contain keywords like COLUMN, VOTING AUTHORITY, SHRS OR PRN, etc.)
+            if any(keyword in line_upper for keyword in ['COLUMN 1', 'COLUMN 2', 'VOTING AUTHORITY', 'SHRS OR', 'NAME OF ISSUER', 'FORM 13F', 'INFORMATION TABLE']):
+                continue
+
+            # Try to parse as a data row
+            # CUSIP is a reliable anchor - it's always 9 alphanumeric characters (case-insensitive)
+            # Must contain at least one digit to avoid matching company names like "Berkshire" or "SPONSORED"
+            # Some filings have spaces in CUSIPs: "00724F 10 1" should be "00724F101"
+            # Find ALL potential CUSIP sequences (with or without spaces), then pick the first valid one
+
+            # First try without spaces (faster path)
+            cusip_match = None
+            cusip = None
+            all_cusip_matches = re.finditer(r'\b([A-Za-z0-9]{9})\b', line)
+            for match in all_cusip_matches:
+                if any(c.isdigit() for c in match.group(1)):
+                    cusip_match = match
+                    cusip = match.group(1)
+                    break
+
+            # If not found, try matching with spaces and cleaning
+            if not cusip_match:
+                # Match sequences of 9-15 chars that might contain spaces
+                spaced_matches = re.finditer(r'\b([A-Za-z0-9 ]{9,15})\b', line)
+                for match in spaced_matches:
+                    cleaned = match.group(1).replace(' ', '')
+                    # Check if cleaned version is exactly 9 chars and has a digit
+                    if len(cleaned) == 9 and any(c.isdigit() for c in cleaned):
+                        cusip_match = match
+                        cusip = cleaned  # Use cleaned version
+                        break
+
+            if cusip_match:
+                # This line contains a CUSIP, so it has the main data
+                # cusip already set above (either from direct match or cleaned from spaced match)
+                cusip_pos = cusip_match.start()
+
+                # Everything before CUSIP is issuer name + class
+                before_cusip = line[:cusip_pos].strip()
+                # Everything after CUSIP is the numeric data
+                # Use match.end() to handle spaced CUSIPs correctly (e.g., "00724F 10 1")
+                after_cusip = line[cusip_match.end():].strip()
+
+                # Split before_cusip into issuer parts
+                # Combine with any pending issuer parts from previous line
+                before_parts = before_cusip.split()
+
+                # If we have pending parts, this completes a multi-line company name
+                if pending_issuer_parts:
+                    before_parts = pending_issuer_parts + before_parts
+                    pending_issuer_parts = []
+
+                if len(before_parts) < 2:
+                    # Not enough data, skip
+                    continue
+
+                # Extract class and issuer name
+                # Common patterns:
+                # - "COMPANY NAME COM" → class="COM", issuer="COMPANY NAME"
+                # - "COMPANY NAME SPONSORED ADR" → class="SPONSORED ADR", issuer="COMPANY NAME"
+                # - "COMPANY NAME CL A" → class="CL A", issuer="COMPANY NAME"
+
+                if len(before_parts) >= 3 and before_parts[-2] == 'SPONSORED' and before_parts[-1] == 'ADR':
+                    title_class = 'SPONSORED ADR'
+                    issuer_parts = before_parts[:-2]
+                elif len(before_parts) >= 3 and before_parts[-2] == 'CL':
+                    title_class = 'CL ' + before_parts[-1]
+                    issuer_parts = before_parts[:-2]
+                elif len(before_parts) >= 5 and ' '.join(before_parts[-4:]).startswith('LIB CAP COM'):
+                    # "LIBERTY MEDIA CORPORATION LIB CAP COM A"
+                    title_class = ' '.join(before_parts[-4:])
+                    issuer_parts = before_parts[:-4]
+                elif len(before_parts) >= 2:
+                    # Default: last word/token is the class
+                    title_class = before_parts[-1]
+                    issuer_parts = before_parts[:-1]
+                else:
+                    # Only one part - skip this row
+                    continue
+
+                issuer_name = ' '.join(issuer_parts)
+
+                # Skip if issuer name is empty
+                if not issuer_name:
+                    continue
+
+                # Parse the numeric data after CUSIP
+                # Flexible format handling since empty columns may not appear
+                # Expected order: VALUE SHARES [TYPE] [DISCRETION] [MANAGERS] [SOLE] [SHARED] [NONE]
+                data_parts = after_cusip.split()
+
+                if len(data_parts) < 2:  # At minimum need value and shares
+                    continue
+
+                try:
+                    # Value and Shares are always the first two fields
+                    value_str = data_parts[0].replace(',', '').replace('$', '')
+                    shares_str = data_parts[1].replace(',', '')
+
+                    value = int(value_str) if value_str and value_str != '-' else 0
+                    shares = float(shares_str) if shares_str and shares_str != '-' else 0
+
+                    # Parse voting columns from the end (look for numeric values)
+                    # Work backwards from end to find up to 3 numeric voting columns
+                    voting_values = []
+                    for i in range(len(data_parts) - 1, 1, -1):  # Start from end, skip first 2 (value/shares)
+                        part = data_parts[i].replace(',', '').replace('.', '')
+                        if part.replace('-', '').isdigit():
+                            # This is a numeric value (could be voting)
+                            val_str = data_parts[i].replace(',', '')
+                            try:
+                                voting_values.insert(0, float(val_str) if val_str != '-' else 0)
+                                if len(voting_values) == 3:
+                                    break
+                            except ValueError:
+                                break
+                        else:
+                            # Non-numeric, stop looking for voting columns
+                            break
+
+                    # Assign voting values (may have 0-3 values)
+                    sole_voting = int(voting_values[0]) if len(voting_values) >= 1 else 0
+                    shared_voting = int(voting_values[1]) if len(voting_values) >= 2 else 0
+                    non_voting = int(voting_values[2]) if len(voting_values) >= 3 else 0
+
+                    # Find investment discretion by looking for non-numeric field after position 2
+                    # It's typically "Shared-Defined", "SOLE", "Defined", etc.
+                    # Skip position 2 which might be TYPE (SH/PRN)
+                    investment_discretion = ''
+                    num_voting_at_end = len(voting_values)
+                    for i in range(2, len(data_parts) - num_voting_at_end):
+                        part = data_parts[i]
+                        # Investment discretion contains letters and is not a known type marker
+                        if part and part not in ['-', 'SH', 'PRN'] and not part.replace(',', '').replace('.', '').isdigit():
+                            investment_discretion = part
+                            break
+
+                    # Create row dict
+                    row_dict = {
+                        'Issuer': issuer_name,
+                        'Class': title_class,
+                        'Cusip': cusip,
+                        'Value': value,
+                        'SharesPrnAmount': shares,
+                        'Type': 'Shares',
+                        'PutCall': '',
+                        'InvestmentDiscretion': investment_discretion,
+                        'SoleVoting': sole_voting,
+                        'SharedVoting': shared_voting,
+                        'NonVoting': non_voting
+                    }
+
+                    parsed_rows.append(row_dict)
+
+                except (ValueError, IndexError):
+                    # Skip rows that don't parse correctly
+                    continue
+
+            else:
+                # No CUSIP on this line - might be first part of a multi-line company name
+                # Store it for the next line
+                if line and not line.startswith(('Total', 'Title')):
+                    pending_issuer_parts = line.split()
+
+    # Create DataFrame
+    if not parsed_rows:
+        return pd.DataFrame()
+
+    table = pd.DataFrame(parsed_rows)
+
+    # Add ticker symbols using CUSIP mapping
+    cusip_mapping = cusip_ticker_mapping(allow_duplicate_cusips=False)
+    table['Ticker'] = table.Cusip.map(cusip_mapping.Ticker)
+
+    return table
--- a/venv/lib/python3.10/site-packages/edgar/thirteenf/parsers/infotable_xml.py
+++ b/venv/lib/python3.10/site-packages/edgar/thirteenf/parsers/infotable_xml.py
@@ -0,0 +1,56 @@
+"""Parser for 13F information table XML format."""
+
+import pandas as pd
+
+from edgar.reference import cusip_ticker_mapping
+from edgar.xmltools import child_text, find_element
+
+__all__ = ['parse_infotable_xml']
+
+
+def parse_infotable_xml(infotable_xml: str) -> pd.DataFrame:
+    """
+    Parse the infotable xml and return a pandas DataFrame
+
+    Args:
+        infotable_xml: XML content of the information table
+
+    Returns:
+        pd.DataFrame: Holdings data with columns matching the XML structure
+    """
+    root = find_element(infotable_xml, "informationTable")
+    rows = []
+    shares_or_principal = {"SH": "Shares", "PRN": "Principal"}
+    for info_tag in root.find_all("infoTable"):
+        info_table = dict()
+
+        info_table['Issuer'] = child_text(info_tag, "nameOfIssuer")
+        info_table['Class'] = child_text(info_tag, "titleOfClass")
+        info_table['Cusip'] = child_text(info_tag, "cusip")
+        info_table['Value'] = int(child_text(info_tag, "value"))
+
+        # Shares or principal
+        shares_tag = info_tag.find("shrsOrPrnAmt")
+        info_table['SharesPrnAmount'] = child_text(shares_tag, "sshPrnamt")
+
+        # Shares or principal
+        ssh_prnamt_type = child_text(shares_tag, "sshPrnamtType")
+        info_table['Type'] = shares_or_principal.get(ssh_prnamt_type)
+
+        info_table["PutCall"] = child_text(info_tag, "putCall") or ""
+        info_table['InvestmentDiscretion'] = child_text(info_tag, "investmentDiscretion")
+
+        # Voting authority
+        voting_auth_tag = info_tag.find("votingAuthority")
+        info_table['SoleVoting'] = int(float(child_text(voting_auth_tag, "Sole")))
+        info_table['SharedVoting'] = int(float(child_text(voting_auth_tag, "Shared")))
+        info_table['NonVoting'] = int(float(child_text(voting_auth_tag, "None")))
+        rows.append(info_table)
+
+    table = pd.DataFrame(rows)
+
+    # Add the ticker symbol
+    cusip_mapping = cusip_ticker_mapping(allow_duplicate_cusips=False)
+    table['Ticker'] = table.Cusip.map(cusip_mapping.Ticker)
+
+    return table
--- a/venv/lib/python3.10/site-packages/edgar/thirteenf/parsers/primary_xml.py
+++ b/venv/lib/python3.10/site-packages/edgar/thirteenf/parsers/primary_xml.py
@@ -0,0 +1,118 @@
+"""Parser for 13F primary document XML format."""
+
+from datetime import datetime
+from decimal import Decimal
+from functools import lru_cache
+
+from edgar._party import Address
+from edgar.thirteenf.models import (
+    FilingManager,
+    OtherManager,
+    CoverPage,
+    SummaryPage,
+    Signature,
+    PrimaryDocument13F
+)
+from edgar.xmltools import child_text, find_element
+
+__all__ = ['parse_primary_document_xml']
+
+
+@lru_cache(maxsize=8)
+def parse_primary_document_xml(primary_document_xml: str):
+    """
+    Parse the primary 13F XML document.
+
+    Args:
+        primary_document_xml: XML content of the primary document
+
+    Returns:
+        PrimaryDocument13F: Parsed primary document data
+    """
+    root = find_element(primary_document_xml, "edgarSubmission")
+    # Header data
+    header_data = root.find("headerData")
+    filer_info = header_data.find("filerInfo")
+    report_period = datetime.strptime(child_text(filer_info, "periodOfReport"), "%m-%d-%Y")
+
+    # Form Data
+    form_data = root.find("formData")
+    cover_page_el = form_data.find("coverPage")
+
+    report_calendar_or_quarter = child_text(form_data, "reportCalendarOrQuarter")
+    report_type = child_text(cover_page_el, "reportType")
+
+    # Filing Manager
+    filing_manager_el = cover_page_el.find("filingManager")
+
+    # Address
+    address_el = filing_manager_el.find("address")
+    address = Address(
+        street1=child_text(address_el, "street1"),
+        street2=child_text(address_el, "street2"),
+        city=child_text(address_el, "city"),
+        state_or_country=child_text(address_el, "stateOrCountry"),
+        zipcode=child_text(address_el, "zipCode")
+    )
+    filing_manager = FilingManager(name=child_text(filing_manager_el, "name"), address=address)
+    # Other managers
+    other_manager_info_el = cover_page_el.find("otherManagersInfo")
+    other_managers = [
+        OtherManager(
+            cik=child_text(other_manager_el, "cik"),
+            name=child_text(other_manager_el, "name"),
+            file_number=child_text(other_manager_el, "form13FFileNumber")
+        )
+        for other_manager_el in other_manager_info_el.find_all("otherManager")
+    ] if other_manager_info_el else []
+
+    # Summary Page
+    summary_page_el = form_data.find("summaryPage")
+    if summary_page_el:
+        other_included_managers_count = child_text(summary_page_el,
+                                                   "otherIncludedManagersCount")
+        if other_included_managers_count:
+            other_included_managers_count = int(other_included_managers_count)
+
+        total_holdings = child_text(summary_page_el, "tableEntryTotal")
+        if total_holdings:
+            total_holdings = int(total_holdings)
+
+        total_value = child_text(summary_page_el, "tableValueTotal")
+        if total_value:
+            total_value = Decimal(total_value)
+    else:
+        other_included_managers_count = 0
+        total_holdings = 0
+        total_value = 0
+
+    # Signature Block
+    signature_block_el = form_data.find("signatureBlock")
+    signature = Signature(
+        name=child_text(signature_block_el, "name"),
+        title=child_text(signature_block_el, "title"),
+        phone=child_text(signature_block_el, "phone"),
+        city=child_text(signature_block_el, "city"),
+        signature=child_text(signature_block_el, "signature"),
+        state_or_country=child_text(signature_block_el, "stateOrCountry"),
+        date=child_text(signature_block_el, "signatureDate")
+    )
+
+    parsed_primary_doc = PrimaryDocument13F(
+        report_period=report_period,
+        cover_page=CoverPage(
+            filing_manager=filing_manager,
+            report_calendar_or_quarter=report_calendar_or_quarter,
+            report_type=report_type,
+            other_managers=other_managers
+        ),
+        signature=signature,
+        summary_page=SummaryPage(
+            other_included_managers_count=other_included_managers_count or 0,
+            total_holdings=total_holdings or 0,
+            total_value=total_value or 0
+        ),
+        additional_information=child_text(cover_page_el, "additionalInformation")
+    )
+
+    return parsed_primary_doc