Initial commit
This commit is contained in:
151
venv/lib/python3.10/site-packages/edgar/xmltools.py
Normal file
151
venv/lib/python3.10/site-packages/edgar/xmltools.py
Normal file
@@ -0,0 +1,151 @@
|
||||
from decimal import Decimal
|
||||
from typing import List, Optional, Tuple, Union
|
||||
|
||||
from bs4 import BeautifulSoup, Tag
|
||||
|
||||
__all__ = [
|
||||
'child_text',
|
||||
'child_value',
|
||||
'child_texts',
|
||||
'find_element',
|
||||
'get_footnote_ids',
|
||||
'optional_decimal',
|
||||
'value_or_footnote',
|
||||
'extract_child_text',
|
||||
'extract_child_value',
|
||||
'value_with_footnotes',
|
||||
]
|
||||
|
||||
|
||||
def find_element(
|
||||
xml_tag_or_string: Union[str, BeautifulSoup, Tag],
|
||||
element_name) -> Tag:
|
||||
"""
|
||||
Find the element with that name in the string or Tag
|
||||
:param xml_tag_or_string: either an exml tag or string containing xml
|
||||
:param element_name: The name of the element to find
|
||||
:return: An element
|
||||
"""
|
||||
if isinstance(xml_tag_or_string, Tag):
|
||||
return xml_tag_or_string.find(element_name)
|
||||
elif isinstance(xml_tag_or_string, str) and "<" in xml_tag_or_string:
|
||||
soup: BeautifulSoup = BeautifulSoup(xml_tag_or_string, features="xml")
|
||||
return soup.find(element_name)
|
||||
|
||||
|
||||
def get_footnote_ids(tag: Tag,
|
||||
sep: str = ',') -> str:
|
||||
"""Get the footnotes from the tag as a string"""
|
||||
return sep.join([
|
||||
el.attrs.get('id') for el in tag.find_all("footnoteId")
|
||||
])
|
||||
|
||||
|
||||
def value_with_footnotes(tag: Tag,
|
||||
footnote_sep: str = ",") -> str:
|
||||
"""Get the value from the tag, including footnotes if there are any
|
||||
Example: Given this xml
|
||||
<underlyingSecurityTitle>
|
||||
<value>Class B Common Stock</value>
|
||||
<footnoteId id="F2"/>
|
||||
<footnoteId id="F3"/>
|
||||
</underlyingSecurityTitle>
|
||||
|
||||
return "Class B Common Stock [F2,F3]"
|
||||
"""
|
||||
value_tag = tag.find('value')
|
||||
value = value_tag.text if value_tag else ""
|
||||
|
||||
footnote_ids = get_footnote_ids(tag, footnote_sep)
|
||||
footnote_str = f"[{footnote_ids}]" if footnote_ids else ""
|
||||
if value:
|
||||
return f"{value} {footnote_str}" if footnote_str else value
|
||||
return footnote_str
|
||||
|
||||
|
||||
def value_or_footnote(el: Tag) -> Optional[str]:
|
||||
value_el = el.find('value')
|
||||
if value_el:
|
||||
return value_el.text.strip()
|
||||
else:
|
||||
footnote = el.find('footnote')
|
||||
if not footnote:
|
||||
footnote = el.find("footnoteId")
|
||||
if footnote:
|
||||
return footnote.attrs['id']
|
||||
|
||||
|
||||
def child_text(parent: Tag,
|
||||
child: str) -> Optional[str]:
|
||||
"""
|
||||
Get the text of the child element if it exists or None
|
||||
:param parent: The parent tag
|
||||
:param child: The name of the child element
|
||||
:return: the text of the child element if it exists or None
|
||||
"""
|
||||
el = parent.find(child)
|
||||
if el:
|
||||
return el.text.strip()
|
||||
|
||||
|
||||
def child_value(parent: Tag,
|
||||
child: str,
|
||||
default_value: str = None) -> str:
|
||||
"""
|
||||
Get the text of the value tag within the child tag if it exists or None
|
||||
|
||||
:param parent: The parent tag
|
||||
:param child: The name of the child element
|
||||
:param default_value: The default value to return if the value is None
|
||||
:return: the text of the child element if it exists or None
|
||||
"""
|
||||
el = parent.find(child)
|
||||
if el:
|
||||
return value_with_footnotes(el)
|
||||
return default_value
|
||||
|
||||
|
||||
def child_texts(parent: Tag,
|
||||
child: str) -> List[str]:
|
||||
"""
|
||||
Get the text of the value tag within the child tag if it exists or None
|
||||
|
||||
:param parent: The parent tag
|
||||
:param child: The name of the child element
|
||||
:return: the text of the child element if it exists or None
|
||||
"""
|
||||
return [el.text for el in parent.find_all(child)]
|
||||
|
||||
|
||||
def optional_decimal(parent: Tag,
|
||||
child: str) -> Optional[Decimal]:
|
||||
text = child_text(parent, child)
|
||||
if text:
|
||||
if text == "N/A":
|
||||
return None
|
||||
return Decimal(text)
|
||||
|
||||
|
||||
def extract_child_text(tag: Tag,
|
||||
key: str,
|
||||
child_tag_name: str) -> Tuple[str, str]:
|
||||
"""Get the child text from the tag and return a Tuple (key, child_value)
|
||||
Useful for populating dicts
|
||||
|
||||
:param tag The element
|
||||
:param key The dict key to use to pupulate the dict or DataFrame
|
||||
:param child_tag_name The child tag name
|
||||
"""
|
||||
return key, child_text(tag, child_tag_name)
|
||||
|
||||
|
||||
def extract_child_value(tag: Tag,
|
||||
key: str,
|
||||
child_tag_name: str) -> Tuple[str, str]:
|
||||
"""Get the child value from the tag and return a Tuple (key, child_value)
|
||||
Useful for populating dicts
|
||||
:param tag The element
|
||||
:param key The dict key to use to pupulate the dict or DataFrame
|
||||
:param child_tag_name The child tag name
|
||||
"""
|
||||
return key, child_value(tag, child_tag_name)
|
||||
Reference in New Issue
Block a user