edgartools/venv/lib/python3.10/site-packages/edgar/npx/parsing.py

import io
import logging
from pathlib import Path
from typing import Any, Dict, Generator, Optional

from lxml import etree as ET

from .data import (
    ClassInfo,
    IncludedManager,
    PrimaryDoc,
    ProxyTable,
    ProxyVoteTable,
    ReportSeriesClassInfo,
    SeriesReport,
    VoteCategory,
    VoteRecord,
)

log = logging.getLogger(__name__)


class BaseExtractor:
    """Base class for XML extractors."""

    def __init__(self, xml_bytes: bytes):
        """Initialize the extractor with raw XML bytes and parse the root element."""
        self.xml_bytes = xml_bytes
        try:
            # Use a recovering parser for robustness against minor XML issues.
            parser = ET.XMLParser(recover=True)
            self.root: Optional[ET._Element] = ET.fromstring(xml_bytes, parser=parser)
            if self.root is None:
                # This might occur if the XML is severely malformed beyond recovery.
                raise ET.ParseError(
                    "Failed to parse XML: root element is None even after recovery."
                )
        except ET.ParseError as e:
            raise ValueError(f"Error parsing XML bytes during initial check: {e}") from e

    @classmethod
    def from_file(cls, xml_file_path: Path) -> "BaseExtractor":
        """Factory method to create an extractor instance from an XML file path."""
        if not xml_file_path.exists():
            raise FileNotFoundError(f"XML file not found: {xml_file_path}")
        xml_bytes = xml_file_path.read_bytes()
        return cls(xml_bytes)

    def extract(self) -> Any:
        """Main extraction method to be implemented by subclasses."""
        raise NotImplementedError("Subclasses must implement the extract method.")

    def _get_optional_text(
        self,
        element: Optional[ET._Element],
        xpath: str,
        namespaces: Optional[Dict[str, str]] = None,
    ) -> Optional[str]:
        """Safely get text from an element, returning None if not found or empty."""
        if element is None:
            return None
        # Ensure namespaces is not None if used; provide an empty dict as default for find if None.
        ns = namespaces if namespaces is not None else {}
        found_element = element.find(xpath, namespaces=ns)
        if found_element is not None and found_element.text:
            return found_element.text.strip()
        return None

    def _get_required_text(
        self,
        element: ET._Element,
        xpath: str,
        namespaces: Optional[Dict[str, str]] = None,
    ) -> str:
        """Get text from an element, raising ValueError if not found or empty."""
        text = self._get_optional_text(element, xpath, namespaces)
        if text is None:
            element_str = ET.tostring(element, pretty_print=True).decode()
            raise ValueError(
                f"Required text not found for xpath: {xpath} in element: {element_str}"
            )
        return text

    def _get_optional_float(
        self,
        element: Optional[ET._Element],
        xpath: str,
        namespaces: Optional[Dict[str, str]] = None,
    ) -> Optional[float]:
        """Safely get float from an element, returning None if not found or not a valid float."""
        text = self._get_optional_text(element, xpath, namespaces)
        if text is None:
            return None
        try:
            return float(text)
        except ValueError:
            return None

    def _get_required_float(
        self,
        element: ET._Element,
        xpath: str,
        namespaces: Optional[Dict[str, str]] = None,
    ) -> float:
        """Get float from an element, raising ValueError if not found or not a valid float."""
        val = self._get_optional_float(element, xpath, namespaces)
        if val is None:
            element_str = ET.tostring(element, pretty_print=True).decode()
            raise ValueError(
                f"Required float not found or invalid for xpath: {xpath} in element: {element_str}"
            )
        return val


# Namespaces for different filing formats
PRIMARY_DOC_NAMESPACES: Dict[str, str] = {
    "npx": "http://www.sec.gov/edgar/npx",
    "com": "http://www.sec.gov/edgar/common",
}


class PrimaryDocExtractor(BaseExtractor):
    """
    Extracts data from a primary_doc.xml string into a PrimaryDoc dataclass.
    Handles different XML formats including standard N-PX and amendment filings.
    """

    def __init__(self, xml_bytes: bytes):
        """Initialize the extractor with raw XML *bytes*."""
        super().__init__(xml_bytes)
        self.doc_prefix = "npx"

    def _get_submission_type(self) -> Optional[str]:
        """Extract the submission_type from the XML to determine filing type."""
        npx_prefix = "npx"
        path = f"{npx_prefix}:headerData/{npx_prefix}:submissionType"
        return self._get_optional_text(self.root, path, PRIMARY_DOC_NAMESPACES)

    def extract(self) -> PrimaryDoc:
        """
        Parses the XML and populates the PrimaryDoc dataclass.
        Handles both standard N-PX filings and amendment filings with different structures.
        """
        if self.root is None:
            raise ValueError("XML root not parsed. Cannot extract.")

        prefix = self.doc_prefix
        submission_type = self._get_submission_type()
        is_amendment_submission = "/A" in submission_type if submission_type else False

        header_data = self.root.find(f"{prefix}:headerData", PRIMARY_DOC_NAMESPACES)
        if header_data is None:
            raise ValueError("Required <headerData> element not found in XML.")

        form_data = self.root.find(f"{prefix}:formData", PRIMARY_DOC_NAMESPACES)
        if form_data is None:
            raise ValueError("Required <formData> element not found in XML.")

        cover_page = form_data.find(f"{prefix}:coverPage", PRIMARY_DOC_NAMESPACES)
        if cover_page is None:
            raise ValueError(
                f"Required <coverPage> element not found in XML using prefix {prefix}."
            )

        reporting_person = cover_page.find(
            f"{prefix}:reportingPerson", PRIMARY_DOC_NAMESPACES
        )
        if reporting_person is None:
            raise ValueError(
                "Required <reportingPerson> element not found in <coverPage>."
            )

        reporting_person_address = reporting_person.find(
            f"{prefix}:address", PRIMARY_DOC_NAMESPACES
        )
        if reporting_person_address is None:
            raise ValueError(
                "Required <address> element not found in <reportingPerson>."
            )

        agent_for_service = cover_page.find(
            f"{prefix}:agentForService", PRIMARY_DOC_NAMESPACES
        )
        agent_for_service_address = (
            agent_for_service.find(f"{prefix}:address", PRIMARY_DOC_NAMESPACES)
            if agent_for_service is not None
            else None
        )

        signature_page = form_data.find(
            f"{prefix}:signaturePage", PRIMARY_DOC_NAMESPACES
        )
        if signature_page is None:
            raise ValueError(
                f"Required <signaturePage> element not found in XML using prefix {prefix}."
            )

        summary_page = form_data.find(f"{prefix}:summaryPage", PRIMARY_DOC_NAMESPACES)
        series_page = form_data.find(f"{prefix}:seriesPage", PRIMARY_DOC_NAMESPACES)
        report_info = cover_page.find(f"{prefix}:reportInfo", PRIMARY_DOC_NAMESPACES)
        explanatory_info = cover_page.find(
            f"{prefix}:explanatoryInformation", PRIMARY_DOC_NAMESPACES
        )
        amendment_info = cover_page.find(
            f"{prefix}:amendmentInfo", PRIMARY_DOC_NAMESPACES
        )
        contact = header_data.find(f"{prefix}:contact", PRIMARY_DOC_NAMESPACES)

        included_managers = []
        if summary_page is not None:
            other_managers_section = summary_page.find(
                f"{prefix}:otherManagers2", PRIMARY_DOC_NAMESPACES
            )
            if other_managers_section is not None:
                # Corrected XPath to use :manager as per original logic for the parent element
                for manager_elem in other_managers_section.findall(
                    f"{prefix}:investmentManagers", PRIMARY_DOC_NAMESPACES
                ):
                    manager = IncludedManager(
                        # Assuming these sub-elements (serialNo, etc.) exist under each 'manager' element
                        # and align with the IncludedManager dataclass fields.
                        serial_no=self._get_required_text(
                            manager_elem, f"{prefix}:serialNo", PRIMARY_DOC_NAMESPACES
                        ),
                        form13f_file_number=self._get_optional_text(
                            manager_elem,
                            f"{prefix}:form13FFileNumber",
                            PRIMARY_DOC_NAMESPACES,
                        ),
                        name=self._get_required_text(
                            manager_elem, f"{prefix}:name", PRIMARY_DOC_NAMESPACES
                        ),
                        sec_file_number=self._get_optional_text(
                            manager_elem,
                            f"{prefix}:secFileNumber",
                            PRIMARY_DOC_NAMESPACES,
                        ),
                    )
                    included_managers.append(manager)

        report_series_class_infos = []
        series_class_section = header_data.find(
            f"{prefix}:seriesClass", PRIMARY_DOC_NAMESPACES
        )
        if series_class_section is not None:
            report_series_class = series_class_section.find(
                f"{prefix}:reportSeriesClass", PRIMARY_DOC_NAMESPACES
            )
            if report_series_class is not None:
                for rpt_series_class_info_elem in report_series_class.findall(
                    f"{prefix}:rptSeriesClassInfo", PRIMARY_DOC_NAMESPACES
                ):
                    series_id = self._get_required_text(
                        rpt_series_class_info_elem,
                        f"{prefix}:seriesId",
                        PRIMARY_DOC_NAMESPACES,
                    )
                    class_infos = []
                    for class_info_elem in rpt_series_class_info_elem.findall(
                        f"{prefix}:classInfo", PRIMARY_DOC_NAMESPACES
                    ):
                        class_id = self._get_required_text(
                            class_info_elem, f"{prefix}:classId", PRIMARY_DOC_NAMESPACES
                        )
                        class_infos.append(ClassInfo(class_id=class_id))
                    report_series_class_infos.append(
                        ReportSeriesClassInfo(
                            series_id=series_id, class_infos=class_infos
                        )
                    )

        series_reports = []
        if series_page is not None:
            series_details = series_page.find(
                f"{prefix}:seriesDetails", PRIMARY_DOC_NAMESPACES
            )
            if series_details is not None:
                for series_report_elem in series_details.findall(
                    f"{prefix}:seriesReports", PRIMARY_DOC_NAMESPACES
                ):
                    series_reports.append(
                        SeriesReport(
                            id_of_series=self._get_required_text(
                                series_report_elem,
                                f"{prefix}:idOfSeries",
                                PRIMARY_DOC_NAMESPACES,
                            ),
                            name_of_series=self._get_optional_text(
                                series_report_elem,
                                f"{prefix}:nameOfSeries",
                                PRIMARY_DOC_NAMESPACES,
                            ),
                            lei_of_series=self._get_optional_text(
                                series_report_elem,
                                f"{prefix}:leiOfSeries",
                                PRIMARY_DOC_NAMESPACES,
                            ),
                        )
                    )

        return PrimaryDoc(
            cik=self._get_required_text(
                header_data,
                f"{prefix}:filerInfo/{prefix}:filer/{prefix}:issuerCredentials/{prefix}:cik",
                PRIMARY_DOC_NAMESPACES,
            ),
            submission_type=submission_type or "",
            period_of_report=self._get_required_text(
                header_data,
                f"{prefix}:filerInfo/{prefix}:periodOfReport",
                PRIMARY_DOC_NAMESPACES,
            ),
            fund_name=self._get_required_text(
                reporting_person, f"{prefix}:name", PRIMARY_DOC_NAMESPACES
            ),
            phone_number=self._get_optional_text(
                reporting_person, f"{prefix}:phoneNumber", PRIMARY_DOC_NAMESPACES
            ),
            street1=self._get_required_text(
                reporting_person_address, "com:street1", PRIMARY_DOC_NAMESPACES
            ),
            street2=self._get_optional_text(
                reporting_person_address, "com:street2", PRIMARY_DOC_NAMESPACES
            ),
            city=self._get_required_text(
                reporting_person_address, "com:city", PRIMARY_DOC_NAMESPACES
            ),
            state=self._get_required_text(
                reporting_person_address, "com:stateOrCountry", PRIMARY_DOC_NAMESPACES
            ),
            zip_code=self._get_required_text(
                reporting_person_address, "com:zipCode", PRIMARY_DOC_NAMESPACES
            ),
            crd_number=self._get_optional_text(
                cover_page, f"{prefix}:reportingCrdNumber", PRIMARY_DOC_NAMESPACES
            )
            or self._get_optional_text(
                cover_page, f"{prefix}:crdNumber", PRIMARY_DOC_NAMESPACES
            ),
            filer_sec_file_number=self._get_optional_text(
                cover_page, f"{prefix}:reportingSecFileNumber", PRIMARY_DOC_NAMESPACES
            )
            or self._get_optional_text(
                cover_page, f"{prefix}:filerSecFileNumber", PRIMARY_DOC_NAMESPACES
            )
            or self._get_optional_text(
                header_data,
                f"{prefix}:filerInfo/{prefix}:filer/{prefix}:fileNumber",
                PRIMARY_DOC_NAMESPACES,
            ),
            lei_number=self._get_optional_text(
                cover_page, f"{prefix}:lei", PRIMARY_DOC_NAMESPACES
            )
            or self._get_optional_text(
                cover_page, f"{prefix}:leiNumber", PRIMARY_DOC_NAMESPACES
            ),
            report_calendar_year=self._get_required_text(
                cover_page, f"{prefix}:reportCalendarYear", PRIMARY_DOC_NAMESPACES
            ),
            report_type=(
                self._get_required_text(
                    report_info, f"{prefix}:reportType", PRIMARY_DOC_NAMESPACES
                )
                if report_info is not None
                else None
            ),
            confidential_treatment=(
                self._get_optional_text(
                    report_info,
                    f"{prefix}:confidentialTreatment",
                    PRIMARY_DOC_NAMESPACES,
                )
                if report_info is not None
                else None
            ),
            notice_explanation=(
                self._get_optional_text(
                    explanatory_info,
                    f"{prefix}:noticeExplanation",
                    PRIMARY_DOC_NAMESPACES,
                )
                if explanatory_info is not None
                else None
            ),
            npx_file_number=self._get_optional_text(
                cover_page, f"{prefix}:fileNumber", PRIMARY_DOC_NAMESPACES
            ),
            explanatory_choice=(
                self._get_optional_text(
                    explanatory_info,
                    f"{prefix}:explanatoryChoice",
                    PRIMARY_DOC_NAMESPACES,
                )
                if explanatory_info is not None
                else None
            ),
            other_included_managers_count=(
                self._get_optional_text(
                    summary_page,
                    f"{prefix}:otherIncludedManagersCount",
                    PRIMARY_DOC_NAMESPACES,
                )
                if summary_page is not None
                else "0"
            ),
            signer_name=self._get_required_text(
                signature_page, f"{prefix}:txSignature", PRIMARY_DOC_NAMESPACES
            ),
            signer_title=self._get_required_text(
                signature_page, f"{prefix}:txTitle", PRIMARY_DOC_NAMESPACES
            ),
            signature_date=self._get_required_text(
                signature_page, f"{prefix}:txAsOfDate", PRIMARY_DOC_NAMESPACES
            ),
            tx_printed_signature=self._get_optional_text(
                signature_page, f"{prefix}:txPrintedSignature", PRIMARY_DOC_NAMESPACES
            ),
            agent_for_service_name=(
                self._get_optional_text(
                    agent_for_service, f"{prefix}:name", PRIMARY_DOC_NAMESPACES
                )
                if agent_for_service is not None
                else None
            ),
            agent_for_service_address_street1=(
                self._get_optional_text(
                    agent_for_service_address, "com:street1", PRIMARY_DOC_NAMESPACES
                )
                if agent_for_service_address is not None
                else None
            ),
            agent_for_service_address_street2=(
                self._get_optional_text(
                    agent_for_service_address, "com:street2", PRIMARY_DOC_NAMESPACES
                )
                if agent_for_service_address is not None
                else None
            ),
            agent_for_service_address_city=(
                self._get_optional_text(
                    agent_for_service_address, "com:city", PRIMARY_DOC_NAMESPACES
                )
                if agent_for_service_address is not None
                else None
            ),
            agent_for_service_address_state_country=(
                self._get_optional_text(
                    agent_for_service_address,
                    "com:stateOrCountry",
                    PRIMARY_DOC_NAMESPACES,
                )
                if agent_for_service_address is not None
                else None
            ),
            agent_for_service_address_zip_code=(
                self._get_optional_text(
                    agent_for_service_address, "com:zipCode", PRIMARY_DOC_NAMESPACES
                )
                if agent_for_service_address is not None
                else None
            ),
            is_amendment=(
                is_amendment_submission if submission_type is not None else None
            ),
            amendment_no=(
                self._get_optional_text(
                    amendment_info, f"{prefix}:amendmentNo", PRIMARY_DOC_NAMESPACES
                )
                if amendment_info is not None
                else None
            ),
            amendment_type=(
                self._get_optional_text(
                    amendment_info, f"{prefix}:amendmentType", PRIMARY_DOC_NAMESPACES
                )
                if amendment_info is not None
                else None
            ),
            conf_denied_expired=(
                self._get_optional_text(
                    amendment_info,
                    f"{prefix}:confDeniedExpired",
                    PRIMARY_DOC_NAMESPACES,
                )
                if amendment_info is not None
                else None
            ),
            de_novo_request_choice=self._get_optional_text(
                header_data,
                f"{prefix}:filerInfo/{prefix}:deNovoRequestChoice",
                PRIMARY_DOC_NAMESPACES,
            ),
            year_or_quarter=self._get_optional_text(
                cover_page, f"{prefix}:yearOrQuarter", PRIMARY_DOC_NAMESPACES
            ),
            included_managers=included_managers,
            registrant_type=self._get_optional_text(
                header_data,
                f"{prefix}:filerInfo/{prefix}:registrantType",
                PRIMARY_DOC_NAMESPACES,
            ),
            live_test_flag=self._get_optional_text(
                header_data,
                f"{prefix}:filerInfo/{prefix}:liveTestFlag",
                PRIMARY_DOC_NAMESPACES,
            ),
            ccc=self._get_optional_text(
                header_data,
                f"{prefix}:filerInfo/{prefix}:filer/{prefix}:issuerCredentials/{prefix}:ccc",
                PRIMARY_DOC_NAMESPACES,
            ),
            contact_name=(
                self._get_optional_text(
                    contact, f"{prefix}:name", PRIMARY_DOC_NAMESPACES
                )
                if contact is not None
                else None
            ),
            contact_phone_number=(
                self._get_optional_text(
                    contact, f"{prefix}:phoneNumber", PRIMARY_DOC_NAMESPACES
                )
                if contact is not None
                else None
            ),
            contact_email_address=(
                self._get_optional_text(
                    contact, f"{prefix}:emailAddress", PRIMARY_DOC_NAMESPACES
                )
                if contact is not None
                else None
            ),
            override_internet_flag=self._get_optional_text(
                header_data, f"{prefix}:overrideInternetFlag", PRIMARY_DOC_NAMESPACES
            ),
            confirming_copy_flag=self._get_optional_text(
                header_data, f"{prefix}:confirmingCopyFlag", PRIMARY_DOC_NAMESPACES
            ),
            investment_company_type=self._get_optional_text(
                header_data,
                f"{prefix}:filerInfo/{prefix}:investmentCompanyType",
                PRIMARY_DOC_NAMESPACES,
            ),
            rpt_include_all_series_flag=(
                self._get_optional_text(
                    series_class_section.find(
                        f"{prefix}:reportSeriesClass", PRIMARY_DOC_NAMESPACES
                    ),
                    f"{prefix}:rptIncludeAllSeriesFlag",
                    PRIMARY_DOC_NAMESPACES,
                )
                if series_class_section is not None
                and series_class_section.find(
                    f"{prefix}:reportSeriesClass", PRIMARY_DOC_NAMESPACES
                )
                is not None
                else None
            ),
            series_count=self._get_optional_text(
                series_page, f"{prefix}:seriesCount", PRIMARY_DOC_NAMESPACES
            ),
            report_series_class_infos=report_series_class_infos,
            series_reports=series_reports,
        )


# Define the namespace for easier access
PROXY_VOTE_TABLE_NAMESPACES = {
    "inf": "http://www.sec.gov/edgar/document/npxproxy/informationtable"
}


class ProxyVoteTableExtractor(BaseExtractor):
    """
    Extracts proxy vote information from SEC N-PX proxy vote table XML data.
    Uses lxml.etree.iterparse for memory-efficient parsing of potentially large files.
    """

    def __init__(self, xml_bytes: bytes):
        """Initialize the extractor with raw XML *bytes*."""
        super().__init__(xml_bytes)
        # The tag for iterparse should be the fully qualified name of the proxyTable element.
        self.proxy_table_iter_tag = (
            f"{{{PROXY_VOTE_TABLE_NAMESPACES['inf']}}}proxyTable"
        )

    def _extract_proxy_table_generator(self) -> Generator[ProxyTable, None, None]:
        """
        Parses the XML and yields ProxyTable objects.
        This is a generator method, renamed to avoid conflict if extract() was also a generator.

        Yields:
            Generator[ProxyTable, None, None]: A generator of ProxyTable dataclass instances.
        """
        xml_file_like = io.BytesIO(self.xml_bytes)

        context = ET.iterparse(
            xml_file_like,
            events=("end",),
            tag=self.proxy_table_iter_tag,
            recover=True,
        )

        for _, element in context:
            try:
                issuer_name = self._get_required_text(
                    element, "inf:issuerName", PROXY_VOTE_TABLE_NAMESPACES
                )
                meeting_date = self._get_required_text(
                    element, "inf:meetingDate", PROXY_VOTE_TABLE_NAMESPACES
                )
                vote_description = self._get_required_text(
                    element, "inf:voteDescription", PROXY_VOTE_TABLE_NAMESPACES
                )
                shares_voted_val = self._get_required_float(
                    element, "inf:sharesVoted", PROXY_VOTE_TABLE_NAMESPACES
                )
                shares_on_loan_val = self._get_required_float(
                    element, "inf:sharesOnLoan", PROXY_VOTE_TABLE_NAMESPACES
                )

                cusip = self._get_optional_text(
                    element, "inf:cusip", PROXY_VOTE_TABLE_NAMESPACES
                )
                isin = self._get_optional_text(
                    element, "inf:isin", PROXY_VOTE_TABLE_NAMESPACES
                )
                figi = self._get_optional_text(
                    element, "inf:figi", PROXY_VOTE_TABLE_NAMESPACES
                )
                other_vote_desc = self._get_optional_text(
                    element, "inf:otherVoteDescription", PROXY_VOTE_TABLE_NAMESPACES
                )
                vote_source = self._get_optional_text(
                    element, "inf:voteSource", PROXY_VOTE_TABLE_NAMESPACES
                )
                vote_series = self._get_optional_text(
                    element, "inf:voteSeries", PROXY_VOTE_TABLE_NAMESPACES
                )
                vote_other_info = self._get_optional_text(
                    element, "inf:voteOtherInfo", PROXY_VOTE_TABLE_NAMESPACES
                )

                vote_categories_list = []
                vote_categories_element = element.find(
                    "inf:voteCategories", namespaces=PROXY_VOTE_TABLE_NAMESPACES
                )
                if vote_categories_element is not None:
                    for cat_elem in vote_categories_element.findall(
                        "inf:voteCategory", namespaces=PROXY_VOTE_TABLE_NAMESPACES
                    ):
                        category_type = self._get_optional_text(
                            cat_elem, "inf:categoryType", PROXY_VOTE_TABLE_NAMESPACES
                        )
                        if category_type:
                            vote_categories_list.append(
                                VoteCategory(category_type=category_type)
                            )

                vote_records_list = []
                vote_element = element.find(
                    "inf:vote", namespaces=PROXY_VOTE_TABLE_NAMESPACES
                )
                if vote_element is not None:
                    for rec_elem in vote_element.findall(
                        "inf:voteRecord", namespaces=PROXY_VOTE_TABLE_NAMESPACES
                    ):
                        how_voted = self._get_optional_text(
                            rec_elem, "inf:howVoted", PROXY_VOTE_TABLE_NAMESPACES
                        )
                        shares_voted_rec = self._get_optional_float(
                            rec_elem, "inf:sharesVoted", PROXY_VOTE_TABLE_NAMESPACES
                        )
                        mgmt_rec = self._get_optional_text(
                            rec_elem,
                            "inf:managementRecommendation",
                            PROXY_VOTE_TABLE_NAMESPACES,
                        )

                        if (
                            how_voted is not None
                            and shares_voted_rec is not None
                            and mgmt_rec is not None
                        ):
                            vote_records_list.append(
                                VoteRecord(
                                    how_voted=how_voted,
                                    shares_voted=shares_voted_rec,
                                    management_recommendation=mgmt_rec,
                                )
                            )

                other_managers_list = []
                vote_manager_element = element.find(
                    "inf:voteManager", namespaces=PROXY_VOTE_TABLE_NAMESPACES
                )
                if vote_manager_element is not None:
                    for other_managers_container in vote_manager_element.findall(
                        "inf:otherManagers", namespaces=PROXY_VOTE_TABLE_NAMESPACES
                    ):
                        for om_elem in other_managers_container.findall(
                            "inf:otherManager", namespaces=PROXY_VOTE_TABLE_NAMESPACES
                        ):
                            manager_id = om_elem.text.strip() if om_elem.text else None
                            if manager_id:
                                other_managers_list.append(manager_id)

                proxy_table_data = ProxyTable(
                    issuer_name=issuer_name,
                    meeting_date=meeting_date,
                    vote_description=vote_description,
                    shares_voted=shares_voted_val,
                    shares_on_loan=shares_on_loan_val,
                    cusip=cusip,
                    isin=isin,
                    figi=figi,
                    other_vote_description=other_vote_desc,
                    vote_source=vote_source,
                    vote_series=vote_series,
                    vote_other_info=vote_other_info,
                    vote_categories=vote_categories_list,
                    vote_records=vote_records_list,
                    other_managers=other_managers_list,
                )
                yield proxy_table_data

            except (ValueError, TypeError) as e:
                log.error(
                    "Skipping proxyTable due to missing/invalid data or parsing error: %s on element %s", e, element.tag if element is not None else 'Unknown Element'
                )

            if element is not None:
                element.clear()
                parent = element.getparent()
                if parent is not None:
                    parent.remove(element)

        del context

    def extract(self) -> ProxyVoteTable:
        """
        Extracts all ProxyTable instances from the XML and returns them in a ProxyVoteTable container.
        This is the main public method for this extractor.
        """
        all_proxy_tables = list(self._extract_proxy_table_generator())
        return ProxyVoteTable(proxy_tables=all_proxy_tables)