Initial commit

2025-12-09 12:13:01 +01:00
commit 8e654ed209
13332 changed files with 2695056 additions and 0 deletions
--- a/venv/lib/python3.10/site-packages/edgar/npx/parsing.py
+++ b/venv/lib/python3.10/site-packages/edgar/npx/parsing.py
@@ -0,0 +1,746 @@
+import io
+import logging
+from pathlib import Path
+from typing import Any, Dict, Generator, Optional
+
+from lxml import etree as ET
+
+from .data import (
+    ClassInfo,
+    IncludedManager,
+    PrimaryDoc,
+    ProxyTable,
+    ProxyVoteTable,
+    ReportSeriesClassInfo,
+    SeriesReport,
+    VoteCategory,
+    VoteRecord,
+)
+
+log = logging.getLogger(__name__)
+
+
+class BaseExtractor:
+    """Base class for XML extractors."""
+
+    def __init__(self, xml_bytes: bytes):
+        """Initialize the extractor with raw XML bytes and parse the root element."""
+        self.xml_bytes = xml_bytes
+        try:
+            # Use a recovering parser for robustness against minor XML issues.
+            parser = ET.XMLParser(recover=True)
+            self.root: Optional[ET._Element] = ET.fromstring(xml_bytes, parser=parser)
+            if self.root is None:
+                # This might occur if the XML is severely malformed beyond recovery.
+                raise ET.ParseError(
+                    "Failed to parse XML: root element is None even after recovery."
+                )
+        except ET.ParseError as e:
+            raise ValueError(f"Error parsing XML bytes during initial check: {e}") from e
+
+    @classmethod
+    def from_file(cls, xml_file_path: Path) -> "BaseExtractor":
+        """Factory method to create an extractor instance from an XML file path."""
+        if not xml_file_path.exists():
+            raise FileNotFoundError(f"XML file not found: {xml_file_path}")
+        xml_bytes = xml_file_path.read_bytes()
+        return cls(xml_bytes)
+
+    def extract(self) -> Any:
+        """Main extraction method to be implemented by subclasses."""
+        raise NotImplementedError("Subclasses must implement the extract method.")
+
+    def _get_optional_text(
+        self,
+        element: Optional[ET._Element],
+        xpath: str,
+        namespaces: Optional[Dict[str, str]] = None,
+    ) -> Optional[str]:
+        """Safely get text from an element, returning None if not found or empty."""
+        if element is None:
+            return None
+        # Ensure namespaces is not None if used; provide an empty dict as default for find if None.
+        ns = namespaces if namespaces is not None else {}
+        found_element = element.find(xpath, namespaces=ns)
+        if found_element is not None and found_element.text:
+            return found_element.text.strip()
+        return None
+
+    def _get_required_text(
+        self,
+        element: ET._Element,
+        xpath: str,
+        namespaces: Optional[Dict[str, str]] = None,
+    ) -> str:
+        """Get text from an element, raising ValueError if not found or empty."""
+        text = self._get_optional_text(element, xpath, namespaces)
+        if text is None:
+            element_str = ET.tostring(element, pretty_print=True).decode()
+            raise ValueError(
+                f"Required text not found for xpath: {xpath} in element: {element_str}"
+            )
+        return text
+
+    def _get_optional_float(
+        self,
+        element: Optional[ET._Element],
+        xpath: str,
+        namespaces: Optional[Dict[str, str]] = None,
+    ) -> Optional[float]:
+        """Safely get float from an element, returning None if not found or not a valid float."""
+        text = self._get_optional_text(element, xpath, namespaces)
+        if text is None:
+            return None
+        try:
+            return float(text)
+        except ValueError:
+            return None
+
+    def _get_required_float(
+        self,
+        element: ET._Element,
+        xpath: str,
+        namespaces: Optional[Dict[str, str]] = None,
+    ) -> float:
+        """Get float from an element, raising ValueError if not found or not a valid float."""
+        val = self._get_optional_float(element, xpath, namespaces)
+        if val is None:
+            element_str = ET.tostring(element, pretty_print=True).decode()
+            raise ValueError(
+                f"Required float not found or invalid for xpath: {xpath} in element: {element_str}"
+            )
+        return val
+
+
+# Namespaces for different filing formats
+PRIMARY_DOC_NAMESPACES: Dict[str, str] = {
+    "npx": "http://www.sec.gov/edgar/npx",
+    "com": "http://www.sec.gov/edgar/common",
+}
+
+
+class PrimaryDocExtractor(BaseExtractor):
+    """
+    Extracts data from a primary_doc.xml string into a PrimaryDoc dataclass.
+    Handles different XML formats including standard N-PX and amendment filings.
+    """
+
+    def __init__(self, xml_bytes: bytes):
+        """Initialize the extractor with raw XML *bytes*."""
+        super().__init__(xml_bytes)
+        self.doc_prefix = "npx"
+
+    def _get_submission_type(self) -> Optional[str]:
+        """Extract the submission_type from the XML to determine filing type."""
+        npx_prefix = "npx"
+        path = f"{npx_prefix}:headerData/{npx_prefix}:submissionType"
+        return self._get_optional_text(self.root, path, PRIMARY_DOC_NAMESPACES)
+
+    def extract(self) -> PrimaryDoc:
+        """
+        Parses the XML and populates the PrimaryDoc dataclass.
+        Handles both standard N-PX filings and amendment filings with different structures.
+        """
+        if self.root is None:
+            raise ValueError("XML root not parsed. Cannot extract.")
+
+        prefix = self.doc_prefix
+        submission_type = self._get_submission_type()
+        is_amendment_submission = "/A" in submission_type if submission_type else False
+
+        header_data = self.root.find(f"{prefix}:headerData", PRIMARY_DOC_NAMESPACES)
+        if header_data is None:
+            raise ValueError("Required <headerData> element not found in XML.")
+
+        form_data = self.root.find(f"{prefix}:formData", PRIMARY_DOC_NAMESPACES)
+        if form_data is None:
+            raise ValueError("Required <formData> element not found in XML.")
+
+        cover_page = form_data.find(f"{prefix}:coverPage", PRIMARY_DOC_NAMESPACES)
+        if cover_page is None:
+            raise ValueError(
+                f"Required <coverPage> element not found in XML using prefix {prefix}."
+            )
+
+        reporting_person = cover_page.find(
+            f"{prefix}:reportingPerson", PRIMARY_DOC_NAMESPACES
+        )
+        if reporting_person is None:
+            raise ValueError(
+                "Required <reportingPerson> element not found in <coverPage>."
+            )
+
+        reporting_person_address = reporting_person.find(
+            f"{prefix}:address", PRIMARY_DOC_NAMESPACES
+        )
+        if reporting_person_address is None:
+            raise ValueError(
+                "Required <address> element not found in <reportingPerson>."
+            )
+
+        agent_for_service = cover_page.find(
+            f"{prefix}:agentForService", PRIMARY_DOC_NAMESPACES
+        )
+        agent_for_service_address = (
+            agent_for_service.find(f"{prefix}:address", PRIMARY_DOC_NAMESPACES)
+            if agent_for_service is not None
+            else None
+        )
+
+        signature_page = form_data.find(
+            f"{prefix}:signaturePage", PRIMARY_DOC_NAMESPACES
+        )
+        if signature_page is None:
+            raise ValueError(
+                f"Required <signaturePage> element not found in XML using prefix {prefix}."
+            )
+
+        summary_page = form_data.find(f"{prefix}:summaryPage", PRIMARY_DOC_NAMESPACES)
+        series_page = form_data.find(f"{prefix}:seriesPage", PRIMARY_DOC_NAMESPACES)
+        report_info = cover_page.find(f"{prefix}:reportInfo", PRIMARY_DOC_NAMESPACES)
+        explanatory_info = cover_page.find(
+            f"{prefix}:explanatoryInformation", PRIMARY_DOC_NAMESPACES
+        )
+        amendment_info = cover_page.find(
+            f"{prefix}:amendmentInfo", PRIMARY_DOC_NAMESPACES
+        )
+        contact = header_data.find(f"{prefix}:contact", PRIMARY_DOC_NAMESPACES)
+
+        included_managers = []
+        if summary_page is not None:
+            other_managers_section = summary_page.find(
+                f"{prefix}:otherManagers2", PRIMARY_DOC_NAMESPACES
+            )
+            if other_managers_section is not None:
+                # Corrected XPath to use :manager as per original logic for the parent element
+                for manager_elem in other_managers_section.findall(
+                    f"{prefix}:investmentManagers", PRIMARY_DOC_NAMESPACES
+                ):
+                    manager = IncludedManager(
+                        # Assuming these sub-elements (serialNo, etc.) exist under each 'manager' element
+                        # and align with the IncludedManager dataclass fields.
+                        serial_no=self._get_required_text(
+                            manager_elem, f"{prefix}:serialNo", PRIMARY_DOC_NAMESPACES
+                        ),
+                        form13f_file_number=self._get_optional_text(
+                            manager_elem,
+                            f"{prefix}:form13FFileNumber",
+                            PRIMARY_DOC_NAMESPACES,
+                        ),
+                        name=self._get_required_text(
+                            manager_elem, f"{prefix}:name", PRIMARY_DOC_NAMESPACES
+                        ),
+                        sec_file_number=self._get_optional_text(
+                            manager_elem,
+                            f"{prefix}:secFileNumber",
+                            PRIMARY_DOC_NAMESPACES,
+                        ),
+                    )
+                    included_managers.append(manager)
+
+        report_series_class_infos = []
+        series_class_section = header_data.find(
+            f"{prefix}:seriesClass", PRIMARY_DOC_NAMESPACES
+        )
+        if series_class_section is not None:
+            report_series_class = series_class_section.find(
+                f"{prefix}:reportSeriesClass", PRIMARY_DOC_NAMESPACES
+            )
+            if report_series_class is not None:
+                for rpt_series_class_info_elem in report_series_class.findall(
+                    f"{prefix}:rptSeriesClassInfo", PRIMARY_DOC_NAMESPACES
+                ):
+                    series_id = self._get_required_text(
+                        rpt_series_class_info_elem,
+                        f"{prefix}:seriesId",
+                        PRIMARY_DOC_NAMESPACES,
+                    )
+                    class_infos = []
+                    for class_info_elem in rpt_series_class_info_elem.findall(
+                        f"{prefix}:classInfo", PRIMARY_DOC_NAMESPACES
+                    ):
+                        class_id = self._get_required_text(
+                            class_info_elem, f"{prefix}:classId", PRIMARY_DOC_NAMESPACES
+                        )
+                        class_infos.append(ClassInfo(class_id=class_id))
+                    report_series_class_infos.append(
+                        ReportSeriesClassInfo(
+                            series_id=series_id, class_infos=class_infos
+                        )
+                    )
+
+        series_reports = []
+        if series_page is not None:
+            series_details = series_page.find(
+                f"{prefix}:seriesDetails", PRIMARY_DOC_NAMESPACES
+            )
+            if series_details is not None:
+                for series_report_elem in series_details.findall(
+                    f"{prefix}:seriesReports", PRIMARY_DOC_NAMESPACES
+                ):
+                    series_reports.append(
+                        SeriesReport(
+                            id_of_series=self._get_required_text(
+                                series_report_elem,
+                                f"{prefix}:idOfSeries",
+                                PRIMARY_DOC_NAMESPACES,
+                            ),
+                            name_of_series=self._get_optional_text(
+                                series_report_elem,
+                                f"{prefix}:nameOfSeries",
+                                PRIMARY_DOC_NAMESPACES,
+                            ),
+                            lei_of_series=self._get_optional_text(
+                                series_report_elem,
+                                f"{prefix}:leiOfSeries",
+                                PRIMARY_DOC_NAMESPACES,
+                            ),
+                        )
+                    )
+
+        return PrimaryDoc(
+            cik=self._get_required_text(
+                header_data,
+                f"{prefix}:filerInfo/{prefix}:filer/{prefix}:issuerCredentials/{prefix}:cik",
+                PRIMARY_DOC_NAMESPACES,
+            ),
+            submission_type=submission_type or "",
+            period_of_report=self._get_required_text(
+                header_data,
+                f"{prefix}:filerInfo/{prefix}:periodOfReport",
+                PRIMARY_DOC_NAMESPACES,
+            ),
+            fund_name=self._get_required_text(
+                reporting_person, f"{prefix}:name", PRIMARY_DOC_NAMESPACES
+            ),
+            phone_number=self._get_optional_text(
+                reporting_person, f"{prefix}:phoneNumber", PRIMARY_DOC_NAMESPACES
+            ),
+            street1=self._get_required_text(
+                reporting_person_address, "com:street1", PRIMARY_DOC_NAMESPACES
+            ),
+            street2=self._get_optional_text(
+                reporting_person_address, "com:street2", PRIMARY_DOC_NAMESPACES
+            ),
+            city=self._get_required_text(
+                reporting_person_address, "com:city", PRIMARY_DOC_NAMESPACES
+            ),
+            state=self._get_required_text(
+                reporting_person_address, "com:stateOrCountry", PRIMARY_DOC_NAMESPACES
+            ),
+            zip_code=self._get_required_text(
+                reporting_person_address, "com:zipCode", PRIMARY_DOC_NAMESPACES
+            ),
+            crd_number=self._get_optional_text(
+                cover_page, f"{prefix}:reportingCrdNumber", PRIMARY_DOC_NAMESPACES
+            )
+            or self._get_optional_text(
+                cover_page, f"{prefix}:crdNumber", PRIMARY_DOC_NAMESPACES
+            ),
+            filer_sec_file_number=self._get_optional_text(
+                cover_page, f"{prefix}:reportingSecFileNumber", PRIMARY_DOC_NAMESPACES
+            )
+            or self._get_optional_text(
+                cover_page, f"{prefix}:filerSecFileNumber", PRIMARY_DOC_NAMESPACES
+            )
+            or self._get_optional_text(
+                header_data,
+                f"{prefix}:filerInfo/{prefix}:filer/{prefix}:fileNumber",
+                PRIMARY_DOC_NAMESPACES,
+            ),
+            lei_number=self._get_optional_text(
+                cover_page, f"{prefix}:lei", PRIMARY_DOC_NAMESPACES
+            )
+            or self._get_optional_text(
+                cover_page, f"{prefix}:leiNumber", PRIMARY_DOC_NAMESPACES
+            ),
+            report_calendar_year=self._get_required_text(
+                cover_page, f"{prefix}:reportCalendarYear", PRIMARY_DOC_NAMESPACES
+            ),
+            report_type=(
+                self._get_required_text(
+                    report_info, f"{prefix}:reportType", PRIMARY_DOC_NAMESPACES
+                )
+                if report_info is not None
+                else None
+            ),
+            confidential_treatment=(
+                self._get_optional_text(
+                    report_info,
+                    f"{prefix}:confidentialTreatment",
+                    PRIMARY_DOC_NAMESPACES,
+                )
+                if report_info is not None
+                else None
+            ),
+            notice_explanation=(
+                self._get_optional_text(
+                    explanatory_info,
+                    f"{prefix}:noticeExplanation",
+                    PRIMARY_DOC_NAMESPACES,
+                )
+                if explanatory_info is not None
+                else None
+            ),
+            npx_file_number=self._get_optional_text(
+                cover_page, f"{prefix}:fileNumber", PRIMARY_DOC_NAMESPACES
+            ),
+            explanatory_choice=(
+                self._get_optional_text(
+                    explanatory_info,
+                    f"{prefix}:explanatoryChoice",
+                    PRIMARY_DOC_NAMESPACES,
+                )
+                if explanatory_info is not None
+                else None
+            ),
+            other_included_managers_count=(
+                self._get_optional_text(
+                    summary_page,
+                    f"{prefix}:otherIncludedManagersCount",
+                    PRIMARY_DOC_NAMESPACES,
+                )
+                if summary_page is not None
+                else "0"
+            ),
+            signer_name=self._get_required_text(
+                signature_page, f"{prefix}:txSignature", PRIMARY_DOC_NAMESPACES
+            ),
+            signer_title=self._get_required_text(
+                signature_page, f"{prefix}:txTitle", PRIMARY_DOC_NAMESPACES
+            ),
+            signature_date=self._get_required_text(
+                signature_page, f"{prefix}:txAsOfDate", PRIMARY_DOC_NAMESPACES
+            ),
+            tx_printed_signature=self._get_optional_text(
+                signature_page, f"{prefix}:txPrintedSignature", PRIMARY_DOC_NAMESPACES
+            ),
+            agent_for_service_name=(
+                self._get_optional_text(
+                    agent_for_service, f"{prefix}:name", PRIMARY_DOC_NAMESPACES
+                )
+                if agent_for_service is not None
+                else None
+            ),
+            agent_for_service_address_street1=(
+                self._get_optional_text(
+                    agent_for_service_address, "com:street1", PRIMARY_DOC_NAMESPACES
+                )
+                if agent_for_service_address is not None
+                else None
+            ),
+            agent_for_service_address_street2=(
+                self._get_optional_text(
+                    agent_for_service_address, "com:street2", PRIMARY_DOC_NAMESPACES
+                )
+                if agent_for_service_address is not None
+                else None
+            ),
+            agent_for_service_address_city=(
+                self._get_optional_text(
+                    agent_for_service_address, "com:city", PRIMARY_DOC_NAMESPACES
+                )
+                if agent_for_service_address is not None
+                else None
+            ),
+            agent_for_service_address_state_country=(
+                self._get_optional_text(
+                    agent_for_service_address,
+                    "com:stateOrCountry",
+                    PRIMARY_DOC_NAMESPACES,
+                )
+                if agent_for_service_address is not None
+                else None
+            ),
+            agent_for_service_address_zip_code=(
+                self._get_optional_text(
+                    agent_for_service_address, "com:zipCode", PRIMARY_DOC_NAMESPACES
+                )
+                if agent_for_service_address is not None
+                else None
+            ),
+            is_amendment=(
+                is_amendment_submission if submission_type is not None else None
+            ),
+            amendment_no=(
+                self._get_optional_text(
+                    amendment_info, f"{prefix}:amendmentNo", PRIMARY_DOC_NAMESPACES
+                )
+                if amendment_info is not None
+                else None
+            ),
+            amendment_type=(
+                self._get_optional_text(
+                    amendment_info, f"{prefix}:amendmentType", PRIMARY_DOC_NAMESPACES
+                )
+                if amendment_info is not None
+                else None
+            ),
+            conf_denied_expired=(
+                self._get_optional_text(
+                    amendment_info,
+                    f"{prefix}:confDeniedExpired",
+                    PRIMARY_DOC_NAMESPACES,
+                )
+                if amendment_info is not None
+                else None
+            ),
+            de_novo_request_choice=self._get_optional_text(
+                header_data,
+                f"{prefix}:filerInfo/{prefix}:deNovoRequestChoice",
+                PRIMARY_DOC_NAMESPACES,
+            ),
+            year_or_quarter=self._get_optional_text(
+                cover_page, f"{prefix}:yearOrQuarter", PRIMARY_DOC_NAMESPACES
+            ),
+            included_managers=included_managers,
+            registrant_type=self._get_optional_text(
+                header_data,
+                f"{prefix}:filerInfo/{prefix}:registrantType",
+                PRIMARY_DOC_NAMESPACES,
+            ),
+            live_test_flag=self._get_optional_text(
+                header_data,
+                f"{prefix}:filerInfo/{prefix}:liveTestFlag",
+                PRIMARY_DOC_NAMESPACES,
+            ),
+            ccc=self._get_optional_text(
+                header_data,
+                f"{prefix}:filerInfo/{prefix}:filer/{prefix}:issuerCredentials/{prefix}:ccc",
+                PRIMARY_DOC_NAMESPACES,
+            ),
+            contact_name=(
+                self._get_optional_text(
+                    contact, f"{prefix}:name", PRIMARY_DOC_NAMESPACES
+                )
+                if contact is not None
+                else None
+            ),
+            contact_phone_number=(
+                self._get_optional_text(
+                    contact, f"{prefix}:phoneNumber", PRIMARY_DOC_NAMESPACES
+                )
+                if contact is not None
+                else None
+            ),
+            contact_email_address=(
+                self._get_optional_text(
+                    contact, f"{prefix}:emailAddress", PRIMARY_DOC_NAMESPACES
+                )
+                if contact is not None
+                else None
+            ),
+            override_internet_flag=self._get_optional_text(
+                header_data, f"{prefix}:overrideInternetFlag", PRIMARY_DOC_NAMESPACES
+            ),
+            confirming_copy_flag=self._get_optional_text(
+                header_data, f"{prefix}:confirmingCopyFlag", PRIMARY_DOC_NAMESPACES
+            ),
+            investment_company_type=self._get_optional_text(
+                header_data,
+                f"{prefix}:filerInfo/{prefix}:investmentCompanyType",
+                PRIMARY_DOC_NAMESPACES,
+            ),
+            rpt_include_all_series_flag=(
+                self._get_optional_text(
+                    series_class_section.find(
+                        f"{prefix}:reportSeriesClass", PRIMARY_DOC_NAMESPACES
+                    ),
+                    f"{prefix}:rptIncludeAllSeriesFlag",
+                    PRIMARY_DOC_NAMESPACES,
+                )
+                if series_class_section is not None
+                and series_class_section.find(
+                    f"{prefix}:reportSeriesClass", PRIMARY_DOC_NAMESPACES
+                )
+                is not None
+                else None
+            ),
+            series_count=self._get_optional_text(
+                series_page, f"{prefix}:seriesCount", PRIMARY_DOC_NAMESPACES
+            ),
+            report_series_class_infos=report_series_class_infos,
+            series_reports=series_reports,
+        )
+
+
+# Define the namespace for easier access
+PROXY_VOTE_TABLE_NAMESPACES = {
+    "inf": "http://www.sec.gov/edgar/document/npxproxy/informationtable"
+}
+
+
+class ProxyVoteTableExtractor(BaseExtractor):
+    """
+    Extracts proxy vote information from SEC N-PX proxy vote table XML data.
+    Uses lxml.etree.iterparse for memory-efficient parsing of potentially large files.
+    """
+
+    def __init__(self, xml_bytes: bytes):
+        """Initialize the extractor with raw XML *bytes*."""
+        super().__init__(xml_bytes)
+        # The tag for iterparse should be the fully qualified name of the proxyTable element.
+        self.proxy_table_iter_tag = (
+            f"{{{PROXY_VOTE_TABLE_NAMESPACES['inf']}}}proxyTable"
+        )
+
+    def _extract_proxy_table_generator(self) -> Generator[ProxyTable, None, None]:
+        """
+        Parses the XML and yields ProxyTable objects.
+        This is a generator method, renamed to avoid conflict if extract() was also a generator.
+
+        Yields:
+            Generator[ProxyTable, None, None]: A generator of ProxyTable dataclass instances.
+        """
+        xml_file_like = io.BytesIO(self.xml_bytes)
+
+        context = ET.iterparse(
+            xml_file_like,
+            events=("end",),
+            tag=self.proxy_table_iter_tag,
+            recover=True,
+        )
+
+        for _, element in context:
+            try:
+                issuer_name = self._get_required_text(
+                    element, "inf:issuerName", PROXY_VOTE_TABLE_NAMESPACES
+                )
+                meeting_date = self._get_required_text(
+                    element, "inf:meetingDate", PROXY_VOTE_TABLE_NAMESPACES
+                )
+                vote_description = self._get_required_text(
+                    element, "inf:voteDescription", PROXY_VOTE_TABLE_NAMESPACES
+                )
+                shares_voted_val = self._get_required_float(
+                    element, "inf:sharesVoted", PROXY_VOTE_TABLE_NAMESPACES
+                )
+                shares_on_loan_val = self._get_required_float(
+                    element, "inf:sharesOnLoan", PROXY_VOTE_TABLE_NAMESPACES
+                )
+
+                cusip = self._get_optional_text(
+                    element, "inf:cusip", PROXY_VOTE_TABLE_NAMESPACES
+                )
+                isin = self._get_optional_text(
+                    element, "inf:isin", PROXY_VOTE_TABLE_NAMESPACES
+                )
+                figi = self._get_optional_text(
+                    element, "inf:figi", PROXY_VOTE_TABLE_NAMESPACES
+                )
+                other_vote_desc = self._get_optional_text(
+                    element, "inf:otherVoteDescription", PROXY_VOTE_TABLE_NAMESPACES
+                )
+                vote_source = self._get_optional_text(
+                    element, "inf:voteSource", PROXY_VOTE_TABLE_NAMESPACES
+                )
+                vote_series = self._get_optional_text(
+                    element, "inf:voteSeries", PROXY_VOTE_TABLE_NAMESPACES
+                )
+                vote_other_info = self._get_optional_text(
+                    element, "inf:voteOtherInfo", PROXY_VOTE_TABLE_NAMESPACES
+                )
+
+                vote_categories_list = []
+                vote_categories_element = element.find(
+                    "inf:voteCategories", namespaces=PROXY_VOTE_TABLE_NAMESPACES
+                )
+                if vote_categories_element is not None:
+                    for cat_elem in vote_categories_element.findall(
+                        "inf:voteCategory", namespaces=PROXY_VOTE_TABLE_NAMESPACES
+                    ):
+                        category_type = self._get_optional_text(
+                            cat_elem, "inf:categoryType", PROXY_VOTE_TABLE_NAMESPACES
+                        )
+                        if category_type:
+                            vote_categories_list.append(
+                                VoteCategory(category_type=category_type)
+                            )
+
+                vote_records_list = []
+                vote_element = element.find(
+                    "inf:vote", namespaces=PROXY_VOTE_TABLE_NAMESPACES
+                )
+                if vote_element is not None:
+                    for rec_elem in vote_element.findall(
+                        "inf:voteRecord", namespaces=PROXY_VOTE_TABLE_NAMESPACES
+                    ):
+                        how_voted = self._get_optional_text(
+                            rec_elem, "inf:howVoted", PROXY_VOTE_TABLE_NAMESPACES
+                        )
+                        shares_voted_rec = self._get_optional_float(
+                            rec_elem, "inf:sharesVoted", PROXY_VOTE_TABLE_NAMESPACES
+                        )
+                        mgmt_rec = self._get_optional_text(
+                            rec_elem,
+                            "inf:managementRecommendation",
+                            PROXY_VOTE_TABLE_NAMESPACES,
+                        )
+
+                        if (
+                            how_voted is not None
+                            and shares_voted_rec is not None
+                            and mgmt_rec is not None
+                        ):
+                            vote_records_list.append(
+                                VoteRecord(
+                                    how_voted=how_voted,
+                                    shares_voted=shares_voted_rec,
+                                    management_recommendation=mgmt_rec,
+                                )
+                            )
+
+                other_managers_list = []
+                vote_manager_element = element.find(
+                    "inf:voteManager", namespaces=PROXY_VOTE_TABLE_NAMESPACES
+                )
+                if vote_manager_element is not None:
+                    for other_managers_container in vote_manager_element.findall(
+                        "inf:otherManagers", namespaces=PROXY_VOTE_TABLE_NAMESPACES
+                    ):
+                        for om_elem in other_managers_container.findall(
+                            "inf:otherManager", namespaces=PROXY_VOTE_TABLE_NAMESPACES
+                        ):
+                            manager_id = om_elem.text.strip() if om_elem.text else None
+                            if manager_id:
+                                other_managers_list.append(manager_id)
+
+                proxy_table_data = ProxyTable(
+                    issuer_name=issuer_name,
+                    meeting_date=meeting_date,
+                    vote_description=vote_description,
+                    shares_voted=shares_voted_val,
+                    shares_on_loan=shares_on_loan_val,
+                    cusip=cusip,
+                    isin=isin,
+                    figi=figi,
+                    other_vote_description=other_vote_desc,
+                    vote_source=vote_source,
+                    vote_series=vote_series,
+                    vote_other_info=vote_other_info,
+                    vote_categories=vote_categories_list,
+                    vote_records=vote_records_list,
+                    other_managers=other_managers_list,
+                )
+                yield proxy_table_data
+
+            except (ValueError, TypeError) as e:
+                log.error(
+                    "Skipping proxyTable due to missing/invalid data or parsing error: %s on element %s", e, element.tag if element is not None else 'Unknown Element'
+                )
+
+            if element is not None:
+                element.clear()
+                parent = element.getparent()
+                if parent is not None:
+                    parent.remove(element)
+
+        del context
+
+    def extract(self) -> ProxyVoteTable:
+        """
+        Extracts all ProxyTable instances from the XML and returns them in a ProxyVoteTable container.
+        This is the main public method for this extractor.
+        """
+        all_proxy_tables = list(self._extract_proxy_table_generator())
+        return ProxyVoteTable(proxy_tables=all_proxy_tables)