Initial commit

This commit is contained in:
kdusek
2025-12-09 12:13:01 +01:00
commit 8e654ed209
13332 changed files with 2695056 additions and 0 deletions

View File

@@ -0,0 +1,263 @@
"""
EdgarTools AI: AI and LLM integration for SEC financial data analysis.
This package provides AI capabilities for EdgarTools including:
- AI Skills: Portable documentation packages for Claude Desktop and other AI tools
- AI-optimized text methods (.text()) with research-backed formats (Markdown-KV, TSV)
- LLM context generation with token optimization
- Model Context Protocol (MCP) server for Claude Desktop integration
- Semantic enrichment of financial data
- Token counting and optimization
Installation:
pip install edgartools[ai]
Dependencies included:
- mcp: Model Context Protocol server support
- tiktoken: Token counting and optimization
Skills API:
>>> from edgar.ai import install_skill, package_skill
>>>
>>> # Install skill to ~/.claude/skills/
>>> install_skill()
PosixPath('/Users/username/.claude/skills/edgartools')
>>>
>>> # Create ZIP for Claude Desktop upload
>>> package_skill()
PosixPath('edgartools.zip')
>>> # List available skills
>>> from edgar.ai import list_skills
>>> skills = list_skills()
AI-Optimized Objects:
>>> from edgar import Company
>>> company = Company("AAPL")
>>>
>>> # Get AI-optimized text representation (Markdown-KV format)
>>> text = company.text(max_tokens=2000)
>>> print(text)
**Company:** Apple Inc.
**CIK:** 0000320193
**Ticker:** AAPL
Context Generation:
>>> from edgar.ai import enhance_financial_fact_llm_context
>>> context = enhance_financial_fact_llm_context(fact, detail_level='detailed')
"""
# Check for AI dependencies
MISSING_DEPS = []
try:
import mcp
MCP_AVAILABLE = True
except ImportError:
MCP_AVAILABLE = False
MISSING_DEPS.append("mcp")
try:
import tiktoken
TIKTOKEN_AVAILABLE = True
except ImportError:
TIKTOKEN_AVAILABLE = False
MISSING_DEPS.append("tiktoken")
# AI is available if we have at least some key dependencies
AI_AVAILABLE = MCP_AVAILABLE or TIKTOKEN_AVAILABLE
# Core functionality (always available)
from edgar.ai.core import AIEnabled, SemanticEnricher, TokenOptimizer, check_ai_capabilities, enhance_financial_fact_llm_context
# Skills infrastructure (always available)
from edgar.ai.skills.base import BaseSkill
from edgar.ai.skills import list_skills, get_skill
from edgar.ai.skills.core import edgartools_skill
from edgar.ai.exporters import export_skill
# Convenience functions for common workflows
def install_skill(skill=None, to=None, quiet=False):
"""
Install a skill to ~/.claude/skills/ for automatic discovery.
Simple, delightful API for installing skills to Claude.
Args:
skill: Skill to install (defaults to edgartools_skill)
to: Custom installation directory (defaults to ~/.claude/skills/)
quiet: If True, suppress output messages (default: False)
Returns:
Path: Path to installed skill directory
Examples:
>>> from edgar.ai import install_skill
>>>
>>> # Install EdgarTools skill (default)
>>> install_skill()
✨ Installing EdgarTools skill...
📁 Installed to: /Users/username/.claude/skills/edgartools
✅ Ready to use in Claude Desktop and Claude Code!
>>>
>>> # Install to custom location
>>> install_skill(to="~/my-skills")
PosixPath('/Users/username/my-skills/edgartools')
"""
if skill is None:
skill = edgartools_skill
# Show delightful message
if not quiet:
print("\n" + "="*60)
print("""
___ _ _____ _
| __|__| |__ _ __ _ _ _ |_ _|__ ___ | |___
| _|/ _` / _` / _` | '_| | |/ _ \\/ _ \\| (_-<
|___\\__,_\\__, \\__,_|_| |_|\\___/\\___/|_/__/
|___/
""")
print("="*60)
print(f"✨ Installing {skill.name} skill...")
print()
result = export_skill(
skill,
format="claude-skills",
output_dir=to,
install=(to is None) # Only use install flag if no custom dir
)
if not quiet:
print(f"📁 Installed to: {result}")
print(f"✅ Ready to use in Claude Desktop and Claude Code!")
print("="*60 + "\n")
return result
def package_skill(skill=None, output=None, quiet=False):
"""
Create a ZIP package for Claude Desktop upload.
Simple, delightful API for packaging skills as ZIP files.
Args:
skill: Skill to package (defaults to edgartools_skill)
output: Output directory (defaults to current directory)
quiet: If True, suppress output messages (default: False)
Returns:
Path: Path to created ZIP file
Examples:
>>> from edgar.ai import package_skill
>>>
>>> # Create ZIP in current directory (default)
>>> package_skill()
📦 Packaging EdgarTools skill...
✅ Created: edgartools.zip
💡 Ready to upload via Claude Desktop's skill upload interface!
>>>
>>> # Create ZIP in custom location
>>> package_skill(output="~/Desktop")
PosixPath('/Users/username/Desktop/edgartools.zip')
"""
if skill is None:
skill = edgartools_skill
# Show delightful message
if not quiet:
print("\n" + "="*60)
print("""
___ _ _____ _
| __|__| |__ _ __ _ _ _ |_ _|__ ___ | |___
| _|/ _` / _` / _` | '_| | |/ _ \\/ _ \\| (_-<
|___\\__,_\\__, \\__,_|_| |_|\\___/\\___/|_/__/
|___/
""")
print("="*60)
print(f"📦 Packaging {skill.name} skill as ZIP...")
print()
result = export_skill(
skill,
format="claude-desktop",
output_dir=output,
create_zip=True
)
if not quiet:
print(f"✅ Created: {result.name}")
print(f"📍 Location: {result.parent}")
print(f"💡 Ready to upload via Claude Desktop's skill upload interface!")
print("="*60 + "\n")
return result
# Optional MCP functionality
# Note: The class-based MCPServer and EdgarToolsServer are deprecated.
# Use the function-based API instead: from edgar.ai.mcp import main, test_server
if MCP_AVAILABLE:
# Provide stub classes for backward compatibility
class MCPServer:
def __init__(self, *args, **kwargs):
raise DeprecationWarning(
"MCPServer class is deprecated. "
"Use function-based API: from edgar.ai.mcp import main, test_server"
)
class EdgarToolsServer:
def __init__(self, *args, **kwargs):
raise DeprecationWarning(
"EdgarToolsServer class is deprecated. "
"Use function-based API: from edgar.ai.mcp import main, test_server"
)
else:
def MCPServer(*args, **kwargs):
raise ImportError(
"MCP support requires additional dependencies. "
"Install with: pip install edgartools[ai]"
)
EdgarToolsServer = MCPServer
# Public API
__all__ = [
# Core
"AIEnabled",
"TokenOptimizer",
"SemanticEnricher",
"enhance_financial_fact_llm_context",
"check_ai_capabilities",
# Skills
"BaseSkill",
"list_skills",
"get_skill",
"edgartools_skill",
"export_skill",
# Convenience functions (delightful API)
"install_skill",
"package_skill",
# MCP
"MCPServer",
"EdgarToolsServer",
# Status flags
"AI_AVAILABLE",
"MCP_AVAILABLE",
"TIKTOKEN_AVAILABLE",
"MISSING_DEPS"
]
def get_ai_info():
"""Get information about AI capabilities."""
return {
"ai_available": AI_AVAILABLE,
"mcp_available": MCP_AVAILABLE,
"tiktoken_available": TIKTOKEN_AVAILABLE,
"missing_dependencies": MISSING_DEPS,
"install_command": "pip install edgartools[ai]" if MISSING_DEPS else None
}

View File

@@ -0,0 +1,16 @@
#!/usr/bin/env python3
"""
EdgarTools MCP Server Entry Point
Enables running the server via: python -m edgar.ai
"""
if __name__ == "__main__":
import sys
from edgar.ai.mcp import main, test_server
# Check for --test flag before starting server
if "--test" in sys.argv or "-t" in sys.argv:
sys.exit(0 if test_server() else 1)
else:
main()

View File

@@ -0,0 +1,391 @@
"""
AI enhancements for EdgarTools entity models.
This module provides enhanced AI capabilities building on the existing
to_llm_context() implementation, adding token optimization, semantic
enrichment, and MCP compatibility.
"""
import json
from abc import ABC, abstractmethod
from datetime import date
from typing import Any, Dict, List, Optional, Union
class TokenOptimizer:
"""Utilities for optimizing content for LLM token limits."""
@staticmethod
def estimate_tokens(content: Union[str, dict]) -> int:
"""
Estimate token count for content.
Rough estimation: ~4 characters per token for English text.
"""
if isinstance(content, dict):
content = json.dumps(content)
return len(content) // 4
@staticmethod
def optimize_for_tokens(content: Dict[str, Any], max_tokens: int) -> Dict[str, Any]:
"""
Optimize content to fit within token limit.
Uses progressive summarization to retain most important information.
"""
current_tokens = TokenOptimizer.estimate_tokens(content)
if current_tokens <= max_tokens:
return content
# Define priority order for content retention
priority_keys = [
'concept', 'value', 'period', 'context',
'quality', 'confidence', 'source'
]
# Start with high-priority content
optimized = {}
for key in priority_keys:
if key in content:
optimized[key] = content[key]
if TokenOptimizer.estimate_tokens(optimized) > max_tokens:
# Remove last added item if we exceed limit
optimized.pop(key)
break
# Add truncation indicator
if len(optimized) < len(content):
optimized['_truncated'] = True
return optimized
class SemanticEnricher:
"""Add semantic context and interpretations to financial data."""
# Concept definitions for common financial terms
CONCEPT_DEFINITIONS = {
"Revenue": "Total income generated from normal business operations",
"Revenues": "Total income generated from normal business operations",
"NetIncome": "Company's total earnings after all expenses and taxes",
"NetIncomeLoss": "Company's total earnings or losses after all expenses",
"Assets": "Resources owned by the company with economic value",
"Liabilities": "Company's financial debts or obligations",
"StockholdersEquity": "Residual interest in assets after deducting liabilities",
"CashAndCashEquivalents": "Highly liquid assets readily convertible to cash",
"OperatingIncome": "Profit from core business operations before interest and taxes",
"EarningsPerShare": "Company's profit divided by outstanding shares",
"CurrentAssets": "Assets expected to be converted to cash within one year",
"CurrentLiabilities": "Obligations due within one year",
}
# Relationships between concepts
CONCEPT_RELATIONSHIPS = {
"Revenue": ["GrossProfit", "OperatingIncome", "NetIncome"],
"Assets": ["CurrentAssets", "NonCurrentAssets", "CashAndCashEquivalents"],
"Liabilities": ["CurrentLiabilities", "LongTermDebt"],
"NetIncome": ["Revenue", "OperatingExpenses", "TaxExpense"],
"StockholdersEquity": ["Assets", "Liabilities", "RetainedEarnings"],
}
@classmethod
def get_concept_definition(cls, concept: str) -> Optional[str]:
"""Get human-readable definition for a concept."""
# Remove namespace prefix if present
concept_key = concept.split(':')[-1]
return cls.CONCEPT_DEFINITIONS.get(concept_key)
@classmethod
def get_related_concepts(cls, concept: str) -> List[str]:
"""Get semantically related concepts."""
concept_key = concept.split(':')[-1]
return cls.CONCEPT_RELATIONSHIPS.get(concept_key, [])
@classmethod
def interpret_value(cls, concept: str, value: Union[int, float],
unit: str, period_type: str = None) -> str:
"""
Generate business interpretation of a financial value.
Args:
concept: The financial concept (e.g., "Revenue")
value: The numeric value
unit: The unit of measurement (e.g., "USD")
period_type: 'instant' or 'duration'
Returns:
Human-readable interpretation
"""
concept_key = concept.split(':')[-1]
# Revenue interpretations
if concept_key in ["Revenue", "Revenues"]:
if value > 1_000_000_000:
scale = "billion-dollar"
elif value > 100_000_000:
scale = "multi-million dollar"
else:
scale = "smaller-scale"
return f"The company is a {scale} business based on revenue"
# Profitability interpretations
elif concept_key in ["NetIncome", "NetIncomeLoss"]:
if value > 0:
return "The company is profitable"
elif value == 0:
return "The company broke even"
else:
return "The company reported a net loss"
# Asset interpretations
elif concept_key == "CashAndCashEquivalents":
if value > 10_000_000_000:
return "Very strong cash position providing significant financial flexibility"
elif value > 1_000_000_000:
return "Healthy cash reserves for operations and investments"
elif value > 100_000_000:
return "Adequate cash position for normal operations"
else:
return "Limited cash reserves may constrain growth opportunities"
return ""
class AIEnabled(ABC):
"""
Base mixin for AI-enabled EdgarTools classes.
Provides standardized AI methods that all classes should implement.
"""
@abstractmethod
def to_llm_context(self, detail_level: str = 'standard',
max_tokens: Optional[int] = None) -> Dict[str, Any]:
"""
Convert object to LLM-optimized context.
Args:
detail_level: Level of detail ('minimal', 'standard', 'detailed')
max_tokens: Optional token limit for response optimization
Returns:
Dictionary optimized for LLM consumption
"""
pass
def to_agent_tool(self) -> Dict[str, Any]:
"""
Convert object to MCP agent tool response format.
Returns:
Dictionary following MCP tool response schema
"""
return {
"data": self.to_dict() if hasattr(self, 'to_dict') else {},
"context": self.to_llm_context(),
"metadata": {
"source": "SEC EDGAR",
"object_type": self.__class__.__name__,
"timestamp": date.today().isoformat()
}
}
@abstractmethod
def get_semantic_description(self) -> str:
"""
Get natural language description of the object.
Returns:
Human-readable description with key insights
"""
pass
def enhance_financial_fact_llm_context(fact, detail_level='standard', max_tokens=None):
"""
Enhanced version of FinancialFact.to_llm_context() with new features.
This function shows how to enhance the existing implementation while
maintaining backward compatibility.
Args:
fact: FinancialFact instance
detail_level: 'minimal', 'standard', or 'detailed'
max_tokens: Optional token limit
Returns:
Enhanced LLM context dictionary
"""
# Start with the existing implementation
context = fact.to_llm_context()
# Add semantic enrichment based on detail level
if detail_level in ['standard', 'detailed']:
# Add concept definition
definition = SemanticEnricher.get_concept_definition(fact.concept)
if definition:
context['definition'] = definition
# Add value interpretation
interpretation = SemanticEnricher.interpret_value(
fact.concept,
fact.numeric_value or fact.value,
fact.unit,
fact.period_type
)
if interpretation:
context['interpretation'] = interpretation
if detail_level == 'detailed':
# Add related concepts
related = SemanticEnricher.get_related_concepts(fact.concept)
if related:
context['related_concepts'] = related
# Add additional metadata
context['metadata'] = {
'taxonomy': fact.taxonomy,
'scale': fact.scale,
'decimals': getattr(fact, 'decimals', None),
'statement_type': fact.statement_type
}
# Add calculation context if available
if hasattr(fact, 'calculation_context') and fact.calculation_context:
context['calculation_context'] = fact.calculation_context
# Optimize for token limit if specified
if max_tokens:
context = TokenOptimizer.optimize_for_tokens(context, max_tokens)
return context
class FinancialFactAIWrapper:
"""
Wrapper to add AI methods to existing FinancialFact instances.
This demonstrates how to add AI capabilities without modifying
the original class definition.
"""
def __init__(self, fact):
self.fact = fact
def to_llm_context(self, detail_level='standard', max_tokens=None):
"""Enhanced LLM context with new features."""
return enhance_financial_fact_llm_context(
self.fact, detail_level, max_tokens
)
def to_agent_tool(self):
"""Convert to MCP tool response format."""
return {
"data": {
"concept": self.fact.concept,
"value": self.fact.value,
"numeric_value": self.fact.numeric_value,
"unit": self.fact.unit,
"period_end": self.fact.period_end.isoformat() if self.fact.period_end else None,
"fiscal_period": self.fact.fiscal_period,
"fiscal_year": self.fact.fiscal_year
},
"context": self.to_llm_context(),
"metadata": {
"source": f"SEC {self.fact.form_type}",
"filed": self.fact.filing_date.isoformat() if self.fact.filing_date else None,
"quality": self.fact.data_quality.value,
"confidence": self.fact.confidence_score
}
}
def get_semantic_description(self):
"""Natural language description of the fact."""
context = self.fact.to_llm_context()
return (f"{context['concept']} of {context['value']} {context['unit']} "
f"{context['period']} from {context['source']}")
def check_ai_capabilities():
"""
Check which AI features are available based on installed dependencies.
Returns:
Dictionary with capability flags
"""
capabilities = {
'basic': True, # Always available
'mcp': False,
'token_optimization': False,
'semantic_enrichment': True, # Works without external deps
}
try:
import mcp # noqa: F401
capabilities['mcp'] = True
except ImportError:
pass
try:
import tiktoken # noqa: F401
capabilities['token_optimization'] = True
except ImportError:
pass
return capabilities
# Example usage demonstrating the enhanced capabilities
if __name__ == "__main__":
# This would be imported from edgar.entity.models
from dataclasses import dataclass
from enum import Enum
class DataQuality(Enum):
HIGH = "high"
@dataclass
class MockFinancialFact:
"""Mock class for demonstration"""
concept: str = "us-gaap:Revenue"
taxonomy: str = "us-gaap"
value: float = 125_000_000_000
numeric_value: float = 125_000_000_000
unit: str = "USD"
scale: int = 1
period_end: date = date(2024, 3, 31)
period_type: str = "duration"
fiscal_period: str = "Q1"
fiscal_year: int = 2024
form_type: str = "10-Q"
filing_date: date = date(2024, 4, 30)
data_quality: DataQuality = DataQuality.HIGH
confidence_score: float = 0.95
statement_type: str = "IncomeStatement"
def to_llm_context(self):
# Simulate existing implementation
return {
"concept": "Revenue",
"value": "125,000 million",
"unit": "USD",
"period": "for Q1 2024",
"context": "",
"quality": "high",
"confidence": 0.95,
"source": "10-Q filed 2024-04-30"
}
# Create a mock fact
fact = MockFinancialFact()
# Wrap it with AI enhancements
ai_fact = FinancialFactAIWrapper(fact)
# Test different detail levels

View File

@@ -0,0 +1,5 @@
"""
EdgarTools AI examples.
This package contains example scripts demonstrating AI capabilities.
"""

View File

@@ -0,0 +1,187 @@
#!/usr/bin/env python3
"""
Basic usage examples for EdgarTools AI features.
This script demonstrates how to use the AI capabilities including
LLM context generation and MCP server functionality.
"""
import json
from datetime import date
# Check if AI features are available
try:
from edgar.ai import (
AI_AVAILABLE,
MCP_AVAILABLE,
get_ai_info,
enhance_financial_fact_llm_context,
check_ai_capabilities
)
except ImportError:
print("EdgarTools AI features not available.")
print("Install with: pip install edgartools[llm]")
exit(1)
def demonstrate_ai_capabilities():
"""Show available AI capabilities."""
print("=== AI Capabilities ===")
info = get_ai_info()
print(f"AI Available: {info['ai_available']}")
print(f"MCP Available: {info['mcp_available']}")
print(f"Token Optimization: {info['tiktoken_available']}")
if info['missing_dependencies']:
print(f"\nMissing dependencies: {', '.join(info['missing_dependencies'])}")
print(f"Install with: {info['install_command']}")
print("\nDetailed capabilities:")
capabilities = check_ai_capabilities()
for capability, available in capabilities.items():
status = "" if available else ""
print(f" {status} {capability}")
def demonstrate_financial_fact_enhancement():
"""Demonstrate enhancing financial facts for LLM consumption."""
print("\n=== Financial Fact Enhancement ===")
# Create a mock financial fact (in real usage, this would come from EdgarTools)
from dataclasses import dataclass
from enum import Enum
class DataQuality(Enum):
HIGH = "high"
@dataclass
class MockFinancialFact:
concept: str = "us-gaap:Revenue"
taxonomy: str = "us-gaap"
label: str = "Revenue"
value: float = 125_000_000_000
numeric_value: float = 125_000_000_000
unit: str = "USD"
scale: int = 1
period_end: date = date(2024, 3, 31)
period_type: str = "duration"
fiscal_period: str = "Q1"
fiscal_year: int = 2024
filing_date: date = date(2024, 4, 30)
form_type: str = "10-Q"
data_quality: DataQuality = DataQuality.HIGH
confidence_score: float = 0.95
statement_type: str = "IncomeStatement"
def to_llm_context(self):
"""Basic LLM context (existing in EdgarTools)."""
return {
"concept": self.label,
"value": f"{self.value:,.0f}",
"unit": self.unit,
"period": f"for {self.fiscal_period} {self.fiscal_year}",
"quality": self.data_quality.value,
"confidence": self.confidence_score,
"source": f"{self.form_type} filed {self.filing_date}"
}
fact = MockFinancialFact()
# Show different detail levels
print("\nMinimal context:")
minimal = enhance_financial_fact_llm_context(fact, detail_level='minimal')
print(json.dumps(minimal, indent=2))
print("\nStandard context (with semantic enrichment):")
standard = enhance_financial_fact_llm_context(fact, detail_level='standard')
print(json.dumps(standard, indent=2))
print("\nToken-limited context (100 tokens):")
limited = enhance_financial_fact_llm_context(fact, detail_level='detailed', max_tokens=100)
print(json.dumps(limited, indent=2))
def demonstrate_mcp_server():
"""Demonstrate MCP server setup."""
print("\n=== MCP Server Setup ===")
if not MCP_AVAILABLE:
print("MCP not available. Install with: pip install edgartools[llm]")
return
try:
from edgar.ai.mcp import get_simple_server
server = get_simple_server()
print("MCP Server created successfully!")
print(f"Server name: {server.name}")
print("\nTo run the server:")
print(" python edgar/ai/run_mcp_server.py")
print("\nOr use in Claude Desktop config:")
print(""" {
"tools": [
{
"type": "mcp",
"name": "edgartools",
"config": {
"command": "python",
"args": ["edgar/ai/run_mcp_server.py"]
}
}
]
}""")
except ImportError as e:
print(f"Error creating MCP server: {e}")
def demonstrate_usage_with_company():
"""Demonstrate AI features with real EdgarTools objects."""
print("\n=== Usage with EdgarTools Company ===")
try:
from edgar import Company
# Get a company
company = Company("AAPL")
print(f"Company: {company.name} ({company.get_ticker()})")
# If the company has a to_llm_context method (future enhancement)
if hasattr(company, 'to_llm_context'):
context = company.to_llm_context()
print("\nLLM Context:")
print(json.dumps(context, indent=2))
else:
print("\nNote: Company.to_llm_context() will be available in future versions")
print("For now, use the AI wrapper functions to enhance EdgarTools objects")
except Exception as e:
print(f"Error demonstrating company usage: {e}")
print("This example requires a working internet connection and valid SEC API access")
def main():
"""Run all demonstrations."""
print("EdgarTools AI Features Demonstration")
print("=" * 50)
# Check capabilities
demonstrate_ai_capabilities()
# Show financial fact enhancement
demonstrate_financial_fact_enhancement()
# Show MCP server setup
demonstrate_mcp_server()
# Show usage with real EdgarTools objects
demonstrate_usage_with_company()
print("\n" + "=" * 50)
print("For more examples, see the documentation in edgar/ai/docs/")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,53 @@
"""
EdgarTools AI skill exporters.
Provides functions to export skills in various formats for AI tool integration.
"""
from edgar.ai.exporters.claude_desktop import export_claude_desktop
from edgar.ai.exporters.claude_skills import export_claude_skills
__all__ = ['export_claude_desktop', 'export_claude_skills', 'export_skill']
def export_skill(skill, format: str = "claude-skills", output_dir=None, **kwargs):
"""
Export a skill in the specified format.
Args:
skill: BaseSkill instance to export
format: Export format:
- "claude-skills": Official Claude Skills format (default, ~/.claude/skills/)
- "claude-desktop": Portable format (current directory)
output_dir: Optional output directory (format-specific defaults)
**kwargs: Additional format-specific parameters:
- claude-skills: install (bool, default True)
- claude-desktop: create_zip (bool, default False)
Returns:
Path: Path to exported skill directory or archive
Examples:
>>> from edgar.ai.skills import edgartools_skill
>>> # Export to ~/.claude/skills/ (default)
>>> export_skill(edgartools_skill, format="claude-skills")
PosixPath('/Users/username/.claude/skills/edgartools')
>>> # Export to current directory (portable)
>>> export_skill(edgartools_skill, format="claude-desktop")
PosixPath('edgartools')
>>> # Export as zip archive
>>> export_skill(edgartools_skill, format="claude-desktop", create_zip=True)
PosixPath('edgartools.zip')
"""
if format == "claude-skills":
return export_claude_skills(skill, output_dir=output_dir, **kwargs)
elif format == "claude-desktop":
return export_claude_desktop(skill, output_dir=output_dir, **kwargs)
else:
raise ValueError(
f"Unknown export format: {format}. "
f"Supported formats: 'claude-skills', 'claude-desktop'"
)

View File

@@ -0,0 +1,173 @@
"""
Claude Desktop skill exporter.
Exports EdgarTools skills for Claude Desktop upload:
- Creates ZIP file with SKILL.md at root (required by Claude Desktop)
- Validates YAML frontmatter structure
- Includes all supporting markdown files and API reference
"""
import shutil
import zipfile
from pathlib import Path
from typing import Optional
import re
def export_claude_desktop(skill, output_dir: Optional[Path] = None, create_zip: bool = True) -> Path:
"""
Export a skill for Claude Desktop upload.
Creates a ZIP file with SKILL.md at the root level, as required by Claude Desktop's
upload interface. The ZIP includes all supporting markdown files and API reference.
Args:
skill: BaseSkill instance to export
output_dir: Optional output directory (defaults to current directory)
create_zip: If True (default), create a zip archive; if False, create directory
Returns:
Path: Path to exported ZIP file (or directory if create_zip=False)
Examples:
>>> from edgar.ai.skills import edgartools_skill
>>> # Create ZIP for Claude Desktop upload (default)
>>> export_claude_desktop(edgartools_skill)
PosixPath('edgartools.zip')
>>> # Create directory for manual installation
>>> export_claude_desktop(edgartools_skill, create_zip=False)
PosixPath('edgartools')
"""
from edgar.ai.skills.base import BaseSkill
if not isinstance(skill, BaseSkill):
raise TypeError(f"Expected BaseSkill instance, got {type(skill)}")
# Determine output directory
if output_dir is None:
output_dir = Path.cwd()
else:
output_dir = Path(output_dir)
# Create skill-specific directory name (kebab-case from skill name)
skill_dir_name = skill.name.lower().replace(' ', '-')
skill_output_dir = output_dir / skill_dir_name
# Remove existing directory if present
if skill_output_dir.exists():
shutil.rmtree(skill_output_dir)
skill_output_dir.mkdir(parents=True, exist_ok=True)
# Get markdown files from skill content directory
content_dir = skill.content_dir
markdown_files = list(content_dir.glob("*.md"))
if not markdown_files:
raise ValueError(f"No markdown files found in {content_dir}")
# Copy and validate each markdown file
# Claude Desktop requires SKILL.md (uppercase) at root
for md_file in markdown_files:
_copy_and_validate_markdown(md_file, skill_output_dir)
# Copy centralized object documentation (API reference)
object_docs = skill.get_object_docs()
if object_docs:
api_ref_dir = skill_output_dir / "api-reference"
api_ref_dir.mkdir(exist_ok=True)
for doc_path in object_docs:
if doc_path.exists():
shutil.copy2(doc_path, api_ref_dir / doc_path.name)
# Silently skip missing docs (allows for optional docs)
# Create zip archive if requested
if create_zip:
zip_path = output_dir / f"{skill_dir_name}.zip"
_create_zip_archive(skill_output_dir, zip_path)
# Clean up directory after zipping
shutil.rmtree(skill_output_dir)
return zip_path
return skill_output_dir
def _copy_and_validate_markdown(source: Path, destination_dir: Path) -> None:
"""
Copy markdown file and validate YAML frontmatter.
Args:
source: Source markdown file path
destination_dir: Destination directory
Raises:
ValueError: If YAML frontmatter is invalid or missing in SKILL.md
"""
dest_file = destination_dir / source.name
# Read and validate
content = source.read_text(encoding='utf-8')
# Only require frontmatter for SKILL.md
if source.name == 'SKILL.md':
# Check for YAML frontmatter
if not content.startswith('---'):
raise ValueError(f"Missing YAML frontmatter in {source.name}")
# Extract frontmatter
parts = content.split('---', 2)
if len(parts) < 3:
raise ValueError(f"Invalid YAML frontmatter structure in {source.name}")
frontmatter = parts[1].strip()
# Validate required frontmatter fields
_validate_skill_frontmatter(frontmatter, source.name)
else:
# Optional: validate frontmatter if present in supporting files
if content.startswith('---'):
parts = content.split('---', 2)
if len(parts) < 3:
raise ValueError(f"Invalid YAML frontmatter structure in {source.name}")
# Copy file
shutil.copy2(source, dest_file)
def _validate_skill_frontmatter(frontmatter: str, filename: str) -> None:
"""
Validate required fields in skill.md frontmatter.
Args:
frontmatter: YAML frontmatter content
filename: Source filename (for error messages)
Raises:
ValueError: If required fields are missing
"""
# Only require essential fields (name and description)
# version and author are optional
required_fields = ['name', 'description']
for field in required_fields:
# Simple regex check (not full YAML parsing to avoid dependencies)
if not re.search(rf'^{field}:', frontmatter, re.MULTILINE):
raise ValueError(f"Missing required field '{field}' in {filename} frontmatter")
def _create_zip_archive(source_dir: Path, zip_path: Path) -> None:
"""
Create a zip archive of the skill directory.
Args:
source_dir: Source directory to zip
zip_path: Output zip file path
"""
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
for file_path in source_dir.rglob('*'):
if file_path.is_file():
arcname = file_path.relative_to(source_dir.parent)
zipf.write(file_path, arcname)

View File

@@ -0,0 +1,163 @@
"""
Claude Skills exporter.
Exports EdgarTools skills in official Anthropic Claude Skills format:
- Installs to ~/.claude/skills/ by default
- Main file: SKILL.md (uppercase, per Anthropic spec)
- Keeps all supporting markdown files
- Validates YAML frontmatter structure
"""
import shutil
from pathlib import Path
from typing import Optional
import re
def export_claude_skills(skill, output_dir: Optional[Path] = None, install: bool = True) -> Path:
"""
Export a skill in official Claude Skills format.
Exports to ~/.claude/skills/ by default, creating SKILL.md (uppercase) as the
main skill file per Anthropic's specification. All supporting markdown files
are preserved.
Args:
skill: BaseSkill instance to export
output_dir: Optional output directory (defaults to ~/.claude/skills/)
install: If True (default), install to ~/.claude/skills/;
if False, use output_dir or current directory
Returns:
Path: Path to exported skill directory
Examples:
>>> from edgar.ai.skills import edgartools_skill
>>> export_claude_skills(edgartools_skill)
PosixPath('/Users/username/.claude/skills/edgartools')
>>> # Export to custom location
>>> export_claude_skills(edgartools_skill,
... output_dir="./my-skills",
... install=False)
PosixPath('./my-skills/edgartools')
"""
from edgar.ai.skills.base import BaseSkill
if not isinstance(skill, BaseSkill):
raise TypeError(f"Expected BaseSkill instance, got {type(skill)}")
# Determine output directory
if install and output_dir is None:
# Default: Install to ~/.claude/skills/
output_dir = Path.home() / ".claude" / "skills"
elif output_dir is None:
# No install flag, no output_dir: use current directory
output_dir = Path.cwd()
else:
output_dir = Path(output_dir)
# Create skill-specific directory name (kebab-case from skill name)
skill_dir_name = skill.name.lower().replace(' ', '-')
skill_output_dir = output_dir / skill_dir_name
# Remove existing directory if present
if skill_output_dir.exists():
shutil.rmtree(skill_output_dir)
skill_output_dir.mkdir(parents=True, exist_ok=True)
# Get markdown files from skill content directory
content_dir = skill.content_dir
markdown_files = list(content_dir.glob("*.md"))
if not markdown_files:
raise ValueError(f"No markdown files found in {content_dir}")
# Copy markdown files
skill_md_found = False
for md_file in markdown_files:
if md_file.name == 'SKILL.md':
# Validate and copy SKILL.md
_copy_and_validate_skill_md(md_file, skill_output_dir)
skill_md_found = True
else:
# Copy supporting markdown files as-is
dest_file = skill_output_dir / md_file.name
shutil.copy2(md_file, dest_file)
if not skill_md_found:
raise ValueError("No SKILL.md found in skill content directory")
# Copy centralized object documentation (API reference)
object_docs = skill.get_object_docs()
if object_docs:
api_ref_dir = skill_output_dir / "api-reference"
api_ref_dir.mkdir(exist_ok=True)
for doc_path in object_docs:
if doc_path.exists():
shutil.copy2(doc_path, api_ref_dir / doc_path.name)
# Silently skip missing docs (allows for optional docs)
return skill_output_dir
def _copy_and_validate_skill_md(source: Path, destination_dir: Path) -> None:
"""
Copy SKILL.md and validate YAML frontmatter.
Args:
source: Source SKILL.md file path
destination_dir: Destination directory
Raises:
ValueError: If YAML frontmatter is invalid or missing
"""
dest_file = destination_dir / source.name
# Read and validate
content = source.read_text(encoding='utf-8')
# Check for YAML frontmatter
if not content.startswith('---'):
raise ValueError(f"Missing YAML frontmatter in {source.name}")
# Extract frontmatter
parts = content.split('---', 2)
if len(parts) < 3:
raise ValueError(f"Invalid YAML frontmatter structure in {source.name}")
frontmatter = parts[1].strip()
# Validate required frontmatter fields
_validate_skill_frontmatter(frontmatter, source.name)
# Copy file
dest_file.write_text(content, encoding='utf-8')
def _validate_skill_frontmatter(frontmatter: str, filename: str) -> None:
"""
Validate required fields in SKILL.md frontmatter.
Per Anthropic spec, SKILL.md must have:
- name: skill identifier (lowercase with hyphens)
- description: clear description of what skill does
Args:
frontmatter: YAML frontmatter content
filename: Source filename (for error messages)
Raises:
ValueError: If required fields are missing
"""
required_fields = ['name', 'description']
for field in required_fields:
# Simple regex check (not full YAML parsing to avoid dependencies)
if not re.search(rf'^{field}:', frontmatter, re.MULTILINE):
raise ValueError(
f"Missing required field '{field}' in {filename} frontmatter. "
f"Claude Skills require both 'name' and 'description' fields."
)

View File

@@ -0,0 +1,101 @@
"""
AI-optimized text formatting utilities for EdgarTools.
Provides research-backed text formats optimized for LLM accuracy and token efficiency:
- Markdown-KV: Best accuracy (60.7%) for metadata
- TSV: Most efficient for tabular data
Based on research from improvingagents.com/blog/best-input-data-format-for-llms
"""
from typing import List, Dict
__all__ = ['to_markdown_kv', 'to_tsv']
def to_markdown_kv(data: dict, max_tokens: int = 2000) -> str:
"""
Convert dict to Markdown Key-Value format optimized for LLMs.
Research shows Markdown-KV format provides:
- 60.7% accuracy (best among tested formats)
- 25% fewer tokens than JSON
- Better readability for both humans and AI
Source: improvingagents.com/blog/best-input-data-format-for-llms
Args:
data: Dictionary with string keys and simple values
max_tokens: Approximate token limit (4 chars/token heuristic)
Returns:
Markdown-formatted key-value text
Example:
>>> to_markdown_kv({"name": "Apple Inc.", "cik": "320193"})
'**Name:** Apple Inc.\\n**Cik:** 320193'
"""
lines = []
for key, value in data.items():
if value is None:
continue
# Convert key to title case for readability
display_key = key.replace('_', ' ').title()
lines.append(f"**{display_key}:** {value}")
text = "\n".join(lines)
# Token limiting (4 chars/token heuristic)
max_chars = max_tokens * 4
if len(text) > max_chars:
text = text[:max_chars] + "\n\n[Truncated for token limit]"
return text
def to_tsv(rows: List[Dict], headers: List[str], max_tokens: int = 2000, limit: int = 10) -> str:
"""
Convert list of dicts to TSV (tab-separated values) format.
TSV is extremely token-efficient for tabular data and provides better
accuracy than CSV. This pattern is proven in MultiPeriodStatement.to_llm_string().
Args:
rows: List of dicts with consistent keys
headers: Column headers to include
max_tokens: Approximate token limit (4 chars/token heuristic)
limit: Maximum rows to include (default: 10)
Returns:
Tab-separated values with header row
Example:
>>> rows = [{"form": "10-K", "cik": "320193"}, {"form": "10-Q", "cik": "789019"}]
>>> to_tsv(rows, ["form", "cik"], limit=2)
'form\\tcik\\n10-K\\t320193\\n10-Q\\t789019'
"""
lines = []
# Header row
lines.append("\t".join(headers))
# Data rows
for row in rows[:limit]:
values = [str(row.get(h, "N/A")) for h in headers]
lines.append("\t".join(values))
text = "\n".join(lines)
# Add summary if truncated
if len(rows) > limit:
text += f"\n\n[Showing {limit} of {len(rows)} rows]"
# Token limiting
max_chars = max_tokens * 4
if len(text) > max_chars:
# Estimate rows that fit
avg_row_size = len(text) // len(lines) if lines else 100
rows_that_fit = max(1, max_chars // avg_row_size)
text = "\n".join(lines[:rows_that_fit]) + "\n\n[Truncated for token limit]"
return text

View File

@@ -0,0 +1,667 @@
"""
Helper functions for common SEC filing analysis tasks.
These convenience wrappers provide simple, high-level access to EdgarTools functionality
for common SEC filing analysis patterns.
"""
from typing import Optional, List, Dict, Union
import pandas as pd
from edgar import get_filings, get_current_filings, Company
__all__ = [
# Filing retrieval
'get_filings_by_period',
'get_today_filings',
# Financial analysis
'get_revenue_trend',
'get_filing_statement',
'compare_companies_revenue',
# Industry and company subset filtering
'filter_by_industry',
'filter_by_company_subset',
# Company subset convenience functions
'get_companies_by_state',
'get_pharmaceutical_companies',
'get_biotechnology_companies',
'get_software_companies',
'get_semiconductor_companies',
'get_banking_companies',
'get_investment_companies',
'get_insurance_companies',
'get_real_estate_companies',
'get_oil_gas_companies',
'get_retail_companies',
]
def get_filings_by_period(
year: int,
quarter: int,
form: Optional[str] = None,
filing_date: Optional[str] = None
):
"""
Get published filings for a specific time period from SEC quarterly indexes.
This is a convenience wrapper around get_filings() with clear parameter names.
Args:
year: Year (e.g., 2023)
quarter: Quarter 1-4 (1=Jan-Mar, 2=Apr-Jun, 3=Jul-Sep, 4=Oct-Dec)
form: Optional form type filter (e.g., "10-K", "10-Q", "S-1")
filing_date: Optional date or range filter (e.g., "2023-02-01:2023-02-28")
Returns:
Filings collection that can be further filtered or iterated
Raises:
HTTPError: If SEC API request fails
ValueError: If year/quarter parameters are invalid
Examples:
>>> # Get all filings from Q1 2023
>>> filings = get_filings_by_period(2023, 1)
>>> # Get only 10-K filings from Q1 2023
>>> filings = get_filings_by_period(2023, 1, form="10-K")
>>> # Get S-1 filings from February 2023
>>> filings = get_filings_by_period(
... 2023, 1,
... form="S-1",
... filing_date="2023-02-01:2023-02-28"
... )
See Also:
- get_filings() - The underlying raw API function
- get_today_filings() - For real-time filings (last 24h)
- Company.get_filings() - For company-specific filings
"""
return get_filings(year, quarter, form=form, filing_date=filing_date)
def get_today_filings():
"""
Get current filings from the last ~24 hours using SEC RSS feed.
This is a convenience wrapper around get_current_filings() for simpler naming.
Returns:
CurrentFilings collection with recent submissions
Raises:
HTTPError: If SEC RSS feed request fails
Examples:
>>> # Get all recent filings
>>> current = get_today_filings()
>>> print(f"Found {len(current)} filings in last 24 hours")
>>> # Filter for specific forms
>>> reports = current.filter(form=["10-K", "10-Q"])
>>> # Filter for specific companies
>>> tech_filings = current.filter(ticker=["AAPL", "MSFT", "GOOGL"])
See Also:
- get_current_filings() - The underlying raw API function
- get_filings_by_period() - For historical filings by quarter
"""
return get_current_filings()
def get_revenue_trend(
ticker: str,
periods: int = 3,
quarterly: bool = False
):
"""
Get income statement trend for revenue analysis using Entity Facts API.
This is the most efficient way to get multi-period financial data as it
uses a single API call to retrieve comparative periods.
Args:
ticker: Company ticker symbol (e.g., "AAPL", "MSFT", "GOOGL")
periods: Number of periods to retrieve (default: 3)
- For annual: Gets last N fiscal years
- For quarterly: Gets last N quarters
quarterly: If True, get quarterly data; if False, get annual data
(default: False for annual)
Returns:
MultiPeriodStatement object containing income statement data across
multiple periods. Can be printed directly or accessed programmatically
via .periods attribute.
Raises:
ValueError: If ticker is invalid or company not found
HTTPError: If SEC Company Facts API request fails
NoCompanyFactsFound: If company has no financial data
Examples:
>>> # Get 3 fiscal years of revenue data (default)
>>> income = get_revenue_trend("AAPL")
>>> print(income) # Shows 3-year revenue trend
>>> # Get 4 quarters of revenue data
>>> quarterly = get_revenue_trend("TSLA", periods=4, quarterly=True)
>>> print(quarterly) # Shows 4-quarter trend
>>> # Get 5 years for long-term analysis
>>> long_term = get_revenue_trend("MSFT", periods=5)
>>> # Access specific period programmatically
>>> income = get_revenue_trend("AAPL", periods=3)
>>> fy2023_data = income.periods[0] # Most recent period
See Also:
- Company.income_statement() - The underlying raw API method
- get_filing_statement() - For statement from specific filing
- compare_companies_revenue() - For multi-company comparison
"""
company = Company(ticker)
return company.income_statement(periods=periods, annual=not quarterly)
def get_filing_statement(
ticker: str,
year: int,
form: str,
statement_type: str = "income"
):
"""
Get a specific financial statement from a company's filing using XBRL.
This provides the most detailed financial data from a specific filing,
including all line items as filed. For multi-period comparison, consider
using get_revenue_trend() instead (more efficient).
Args:
ticker: Company ticker symbol (e.g., "AAPL", "MSFT")
year: Filing year (e.g., 2023)
form: Form type (e.g., "10-K" for annual, "10-Q" for quarterly)
statement_type: Type of statement to retrieve (default: "income")
- "income" - Income statement
- "balance" - Balance sheet
- "cash_flow" - Cash flow statement
Returns:
Statement object with detailed line items from the filing.
Can be printed directly or accessed programmatically.
Raises:
ValueError: If statement_type is not recognized or ticker invalid
HTTPError: If SEC API request fails
IndexError: If no filing found for the specified year/form
XBRLError: If XBRL parsing fails
Examples:
>>> # Get income statement from Apple's 2023 10-K
>>> income = get_filing_statement("AAPL", 2023, "10-K", "income")
>>> print(income)
>>> # Get balance sheet from quarterly filing
>>> balance = get_filing_statement("AAPL", 2023, "10-Q", "balance")
>>> # Get cash flow statement
>>> cash_flow = get_filing_statement("MSFT", 2023, "10-K", "cash_flow")
>>> # Get all three major statements
>>> income = get_filing_statement("GOOGL", 2023, "10-K", "income")
>>> balance = get_filing_statement("GOOGL", 2023, "10-K", "balance")
>>> cash = get_filing_statement("GOOGL", 2023, "10-K", "cash_flow")
See Also:
- Filing.xbrl() - The underlying XBRL parsing method
- get_revenue_trend() - More efficient for multi-period data
- Company.get_filings() - For accessing filings directly
"""
company = Company(ticker)
filing = company.get_filings(year=year, form=form)[0]
xbrl = filing.xbrl()
if statement_type == "income":
return xbrl.statements.income_statement()
elif statement_type == "balance":
return xbrl.statements.balance_sheet()
elif statement_type == "cash_flow":
return xbrl.statements.cash_flow_statement()
else:
raise ValueError(
f"Unknown statement type: {statement_type}. "
f"Must be 'income', 'balance', or 'cash_flow'"
)
def compare_companies_revenue(
tickers: Union[List[str], tuple],
periods: int = 3
) -> Dict[str, 'MultiPeriodStatement']:
"""
Compare revenue trends across multiple companies using Entity Facts API.
This is the most efficient way to compare companies as it makes one API
call per company (vs. multiple calls if using individual filings).
Args:
tickers: List or tuple of ticker symbols (e.g., ["AAPL", "MSFT", "GOOGL"])
periods: Number of periods to compare (default: 3 fiscal years)
Returns:
Dictionary mapping ticker symbol to MultiPeriodStatement.
Access individual company data via results["TICKER"].
Raises:
ValueError: If any ticker is invalid
HTTPError: If SEC Company Facts API request fails for any company
Examples:
>>> # Compare three tech companies
>>> results = compare_companies_revenue(["AAPL", "MSFT", "GOOGL"], periods=3)
>>> print("Apple Revenue:")
>>> print(results["AAPL"])
>>> print("\nMicrosoft Revenue:")
>>> print(results["MSFT"])
>>> # Compare with tuple of tickers
>>> results = compare_companies_revenue(("AAPL", "MSFT"), periods=5)
>>> # Iterate through all results
>>> results = compare_companies_revenue(["AAPL", "MSFT", "GOOGL"])
>>> for ticker, statement in results.items():
... print(f"\n{ticker} Revenue Trend:")
... print(statement)
>>> # Handle errors gracefully
>>> tickers = ["AAPL", "INVALID", "MSFT"]
>>> results = {}
>>> for ticker in tickers:
... try:
... company = Company(ticker)
... results[ticker] = company.income_statement(periods=3)
... except Exception as e:
... print(f"Error with {ticker}: {e}")
See Also:
- get_revenue_trend() - For single company analysis
- Company.income_statement() - The underlying method used
"""
results = {}
for ticker in tickers:
company = Company(ticker)
results[ticker] = company.income_statement(periods=periods)
return results
def filter_by_industry(
filings: 'Filings',
sic: Optional[Union[int, List[int]]] = None,
sic_range: Optional[tuple[int, int]] = None,
sic_description_contains: Optional[str] = None,
) -> 'Filings':
"""
Filter filings by industry using comprehensive company dataset (EFFICIENT).
This REPLACES the old implementation which made N SEC API calls.
New approach uses the comprehensive company dataset to identify target
companies instantly (zero API calls), then filters filings by CIK.
Performance Comparison:
- OLD: ~9 minutes for Q4 2023 8-K (5,400 API calls)
- NEW: ~30s first time, <1s cached (zero API calls)
- 100x+ faster for large filing sets
Args:
filings: Filings collection to filter (from get_filings() or similar)
sic: Single SIC code or list (e.g., 2834 or [2834, 2835, 2836])
sic_range: SIC range tuple (e.g., (7300, 7400) for tech)
Note: Use EXCLUSIVE upper bound (7400 means up to 7399)
sic_description_contains: Search SIC description (e.g., "software")
Returns:
Filtered Filings collection containing only filings from companies
in the specified industry
Raises:
ValueError: If no filter parameters provided
Examples:
>>> from edgar import get_filings
>>> from edgar.ai.helpers import filter_by_industry
>>>
>>> # Filter filings to pharmaceutical companies
>>> filings = get_filings(2023, 4, form="10-K")
>>> pharma_10ks = filter_by_industry(filings, sic=2834)
>>>
>>> # Filter to technology companies (SIC 7300-7399)
>>> filings = get_filings(2023, 4, form="8-K")
>>> tech_8ks = filter_by_industry(filings, sic_range=(7300, 7400))
>>>
>>> # Filter using description search
>>> filings = get_filings(2023, 4)
>>> software = filter_by_industry(filings, sic_description_contains="software")
>>>
>>> # Combine with other filters
>>> filings = get_filings(2023, 4, form="10-K") # Pre-filter by form
>>> nyse = filings.filter(exchange="NYSE") # Pre-filter by exchange
>>> pharma_nyse = filter_by_industry(nyse, sic=2834) # Then by industry
See Also:
- filter_by_company_subset() - Filter using CompanySubset fluent interface
- get_companies_by_industry() - Get company list directly (from edgar.reference)
- Filings.filter() - The underlying filter method
"""
from edgar.reference import get_companies_by_industry
# Validate inputs
if len(filings) == 0:
return filings
# Get companies in target industry (instant, local, zero API calls)
companies = get_companies_by_industry(
sic=sic,
sic_range=sic_range,
sic_description_contains=sic_description_contains
)
# Extract CIKs
target_ciks = companies['cik'].tolist()
if not target_ciks:
# Return empty Filings collection with same structure
return filings.filter(cik=[])
# Filter filings using target CIKs (instant, PyArrow operation)
return filings.filter(cik=target_ciks)
def filter_by_company_subset(
filings: 'Filings',
companies: Union['CompanySubset', pd.DataFrame]
) -> 'Filings':
"""
Filter filings using a CompanySubset or company DataFrame.
This enables advanced company filtering using the CompanySubset fluent
interface (industry + state + sampling + etc) or any custom company DataFrame.
Args:
filings: Filings collection to filter
companies: CompanySubset object or pandas DataFrame with 'cik' column
Returns:
Filtered Filings collection
Raises:
ValueError: If companies DataFrame doesn't have 'cik' column
Examples:
>>> from edgar import get_filings
>>> from edgar.reference import CompanySubset
>>> from edgar.ai.helpers import filter_by_company_subset
>>>
>>> # Get filings
>>> filings = get_filings(2023, 4, form="10-K")
>>>
>>> # Filter to Delaware pharmaceutical companies, sample 10
>>> companies = (CompanySubset()
... .from_industry(sic=2834)
... .from_state('DE')
... .sample(10, random_state=42))
>>> pharma_de_filings = filter_by_company_subset(filings, companies)
>>>
>>> # Or pass the DataFrame directly
>>> from edgar.reference import get_pharmaceutical_companies
>>> pharma = get_pharmaceutical_companies()
>>> pharma_filings = filter_by_company_subset(filings, pharma)
See Also:
- filter_by_industry() - Simpler industry-only filtering
- CompanySubset - Fluent interface for complex filtering (from edgar.reference)
"""
from edgar.reference import CompanySubset
# Extract DataFrame if CompanySubset passed
if isinstance(companies, CompanySubset):
companies = companies.get()
# Extract CIKs
if 'cik' not in companies.columns:
raise ValueError("companies DataFrame must have 'cik' column")
target_ciks = companies['cik'].tolist()
if not target_ciks:
return filings.filter(cik=[])
return filings.filter(cik=target_ciks)
# ============================================================================
# Company Subset Convenience Functions
# ============================================================================
def get_companies_by_state(states: Union[str, List[str]]) -> pd.DataFrame:
"""
Get companies by state of incorporation.
Args:
states: State code(s) (e.g., 'DE' or ['DE', 'NV'])
Returns:
DataFrame with companies incorporated in specified state(s).
Columns: cik, ticker, name, exchange, sic, sic_description,
state_of_incorporation, state_of_incorporation_description,
fiscal_year_end, entity_type, ein
Examples:
>>> # Delaware companies (most common)
>>> de_companies = get_companies_by_state('DE')
>>> print(f"Found {len(de_companies)} Delaware companies")
>>>
>>> # Multiple states
>>> tech_hubs = get_companies_by_state(['DE', 'CA', 'NV'])
>>> print(tech_hubs[['ticker', 'name', 'state_of_incorporation']].head())
See Also:
- filter_by_company_subset() - Filter filings by company subset
- CompanySubset.from_state() - Fluent interface (from edgar.reference)
"""
from edgar.reference import get_companies_by_state as _get_by_state
return _get_by_state(states)
def get_pharmaceutical_companies() -> pd.DataFrame:
"""
Get all pharmaceutical companies (SIC 2834 - Pharmaceutical Preparations).
Returns:
DataFrame with pharmaceutical companies and comprehensive metadata.
Examples:
>>> pharma = get_pharmaceutical_companies()
>>> print(f"Found {len(pharma)} pharmaceutical companies")
>>> print(pharma[['ticker', 'name']].head())
See Also:
- get_biotechnology_companies() - Broader biotech category
- filter_by_industry() - Filter filings by industry
"""
from edgar.reference import get_pharmaceutical_companies as _get_pharma
return _get_pharma()
def get_biotechnology_companies() -> pd.DataFrame:
"""
Get all biotechnology companies (SIC 2833-2836).
Returns:
DataFrame with biotechnology companies and comprehensive metadata.
Examples:
>>> biotech = get_biotechnology_companies()
>>> print(f"Found {len(biotech)} biotechnology companies")
See Also:
- get_pharmaceutical_companies() - Narrower pharma category
- filter_by_industry() - Filter filings by industry
"""
from edgar.reference import get_biotechnology_companies as _get_biotech
return _get_biotech()
def get_software_companies() -> pd.DataFrame:
"""
Get all software companies (SIC 7371-7379 - Computer Programming and Software).
Returns:
DataFrame with software companies and comprehensive metadata.
Examples:
>>> software = get_software_companies()
>>> print(f"Found {len(software)} software companies")
>>> # Get recent 10-K filings from software companies
>>> from edgar import get_filings
>>> filings = get_filings(2023, 4, form="10-K")
>>> software_10ks = filter_by_company_subset(filings, software)
See Also:
- get_semiconductor_companies() - Hardware tech companies
- filter_by_industry() - Filter filings by industry
"""
from edgar.reference import get_software_companies as _get_software
return _get_software()
def get_semiconductor_companies() -> pd.DataFrame:
"""
Get all semiconductor companies (SIC 3674 - Semiconductors and Related Devices).
Returns:
DataFrame with semiconductor companies and comprehensive metadata.
Examples:
>>> semis = get_semiconductor_companies()
>>> print(f"Found {len(semis)} semiconductor companies")
See Also:
- get_software_companies() - Software tech companies
- filter_by_industry() - Filter filings by industry
"""
from edgar.reference import get_semiconductor_companies as _get_semi
return _get_semi()
def get_banking_companies() -> pd.DataFrame:
"""
Get all banking companies (SIC 6020-6029 - Commercial Banks).
Returns:
DataFrame with banking companies and comprehensive metadata.
Examples:
>>> banks = get_banking_companies()
>>> print(f"Found {len(banks)} banking companies")
See Also:
- get_investment_companies() - Investment/securities firms
- get_insurance_companies() - Insurance companies
- filter_by_industry() - Filter filings by industry
"""
from edgar.reference import get_banking_companies as _get_banks
return _get_banks()
def get_investment_companies() -> pd.DataFrame:
"""
Get all investment companies (SIC 6200-6299 - Security and Commodity Brokers).
Returns:
DataFrame with investment companies and comprehensive metadata.
Examples:
>>> investments = get_investment_companies()
>>> print(f"Found {len(investments)} investment companies")
See Also:
- get_banking_companies() - Commercial banks
- get_insurance_companies() - Insurance companies
- filter_by_industry() - Filter filings by industry
"""
from edgar.reference import get_investment_companies as _get_invest
return _get_invest()
def get_insurance_companies() -> pd.DataFrame:
"""
Get all insurance companies (SIC 6300-6399 - Insurance Carriers).
Returns:
DataFrame with insurance companies and comprehensive metadata.
Examples:
>>> insurance = get_insurance_companies()
>>> print(f"Found {len(insurance)} insurance companies")
See Also:
- get_banking_companies() - Commercial banks
- get_investment_companies() - Investment firms
- filter_by_industry() - Filter filings by industry
"""
from edgar.reference import get_insurance_companies as _get_insurance
return _get_insurance()
def get_real_estate_companies() -> pd.DataFrame:
"""
Get all real estate companies (SIC 6500-6599 - Real Estate).
Returns:
DataFrame with real estate companies and comprehensive metadata.
Examples:
>>> real_estate = get_real_estate_companies()
>>> print(f"Found {len(real_estate)} real estate companies")
See Also:
- filter_by_industry() - Filter filings by industry
"""
from edgar.reference import get_real_estate_companies as _get_re
return _get_re()
def get_oil_gas_companies() -> pd.DataFrame:
"""
Get all oil and gas companies (SIC 1300-1399 - Oil and Gas Extraction).
Returns:
DataFrame with oil and gas companies and comprehensive metadata.
Examples:
>>> oil_gas = get_oil_gas_companies()
>>> print(f"Found {len(oil_gas)} oil and gas companies")
See Also:
- filter_by_industry() - Filter filings by industry
"""
from edgar.reference import get_oil_gas_companies as _get_oil_gas
return _get_oil_gas()
def get_retail_companies() -> pd.DataFrame:
"""
Get all retail companies (SIC 5200-5999 - Retail Trade).
Returns:
DataFrame with retail companies and comprehensive metadata.
Examples:
>>> retail = get_retail_companies()
>>> print(f"Found {len(retail)} retail companies")
See Also:
- filter_by_industry() - Filter filings by industry
"""
from edgar.reference import get_retail_companies as _get_retail
return _get_retail()

View File

@@ -0,0 +1,27 @@
"""
Model Context Protocol (MCP) server for EdgarTools.
This module provides MCP server functionality to expose EdgarTools
capabilities to AI agents and assistants like Claude Desktop.
Usage:
# Start the server
python -m edgar.ai
# Or via console script
edgartools-mcp
# Test the server configuration
python -m edgar.ai --test
For configuration and setup instructions, see:
edgar/ai/mcp/docs/MCP_QUICKSTART.md
"""
from edgar.ai.mcp.server import main, test_server
__all__ = [
"main",
"test_server",
]

View File

@@ -0,0 +1,467 @@
# EdgarTools MCP Quickstart Guide
This guide helps you get started with EdgarTools MCP server in under 5 minutes.
## Installation
```bash
# Install EdgarTools with AI features
pip install edgartools[ai]
```
## Starting the Server
EdgarTools provides two ways to start the MCP server:
### Option 1: Python Module (Recommended)
```bash
python -m edgar.ai
```
### Option 2: Console Script
```bash
edgartools-mcp
```
Both methods work identically and will start the MCP server listening on stdin/stdout.
## Client Configuration
### Claude Desktop
**Step 1: Install Claude Desktop**
- Download from https://claude.ai/download (macOS or Windows)
**Step 2: Configure the Server**
You can configure EdgarTools MCP in two ways:
**Option A: Using Claude Desktop Settings (Easier)**
1. Open Claude Desktop
2. Go to Settings (macOS: `Cmd+,` / Windows: `Ctrl+,`)
3. Navigate to **Developer** tab
4. Click **Edit Config** button
5. This will open `claude_desktop_config.json` in your default editor
**Option B: Edit Configuration File Directly**
Configuration file location:
- **macOS**: `~/Library/Application Support/Claude/claude_desktop_config.json`
- **Windows**: `%APPDATA%\Claude\claude_desktop_config.json`
**Configuration (macOS):**
```json
{
"mcpServers": {
"edgartools": {
"command": "python3",
"args": ["-m", "edgar.ai"],
"env": {
"EDGAR_IDENTITY": "Your Name your.email@example.com"
}
}
}
}
```
**Configuration (Windows):**
```json
{
"mcpServers": {
"edgartools": {
"command": "python",
"args": ["-m", "edgar.ai"],
"env": {
"EDGAR_IDENTITY": "Your Name your.email@example.com"
}
}
}
}
```
**Important:** On macOS, use `python3` (not `python`) as the command. On Windows, use `python`.
**Important Notes:**
- Replace `"Your Name your.email@example.com"` with your actual name and email
- The `EDGAR_IDENTITY` is required by the SEC for API requests
- Use forward slashes in paths, even on Windows
**Step 3: Restart and Verify**
1. Save the configuration file
2. Restart Claude Desktop
3. Look for the MCP server indicator (🔨) in the bottom-right corner of the chat input
4. Try asking: "Research Apple Inc with financials"
### Cline (VS Code Extension)
**Configuration File:** `.vscode/cline_mcp_settings.json` in your project
```json
{
"mcpServers": {
"edgartools": {
"command": "python3",
"args": ["-m", "edgar.ai"],
"env": {
"EDGAR_IDENTITY": "Your Name your.email@example.com"
}
}
}
}
```
**Note:** Use `python3` on macOS/Linux, or `python` on Windows.
### Continue.dev
**Configuration File:** `~/.continue/config.json`
```json
{
"mcpServers": {
"edgartools": {
"command": "python3",
"args": ["-m", "edgar.ai"],
"env": {
"EDGAR_IDENTITY": "Your Name your.email@example.com"
}
}
}
}
```
**Note:** Use `python3` on macOS/Linux, or `python` on Windows.
## Available Tools
Once connected, AI agents have access to workflow-oriented tools designed for real-world research tasks:
### Workflow Tools (Recommended)
#### 1. edgar_company_research
Comprehensive company intelligence combining profile, financials, recent activity, and ownership in a single workflow.
**Example prompts:**
- "Research Tesla including financials and recent filings"
- "Give me a detailed analysis of Apple Inc"
- "Show me Microsoft's company profile with ownership data"
**Parameters:**
- `identifier` (required): Company ticker, CIK, or name
- `include_financials` (default: true): Include latest financial statements
- `include_filings` (default: true): Include recent filing activity summary
- `include_ownership` (default: false): Include insider/institutional ownership highlights
- `detail_level` (default: "standard"): Response detail - "minimal", "standard", or "detailed"
**What it provides:**
- Company profile (name, CIK, ticker, industry)
- Latest financial metrics and statements
- Recent filing activity summary
- Ownership highlights (when requested)
#### 2. edgar_analyze_financials
Multi-period financial statement analysis for trend analysis and comparisons.
**Example prompts:**
- "Analyze Apple's income statement for the last 4 years"
- "Show me Tesla's quarterly cash flow for the last 8 quarters"
- "Compare Microsoft's income, balance sheet, and cash flow statements"
**Parameters:**
- `company` (required): Company ticker, CIK, or name
- `periods` (default: 4): Number of periods to analyze
- `annual` (default: true): Annual (true) or quarterly (false) periods
- `statement_types` (default: ["income"]): Statements to include - "income", "balance", "cash_flow"
**What it provides:**
- Multi-period income statements
- Multi-period balance sheets
- Multi-period cash flow statements
- Formatted for AI analysis and comparison
### Basic Tools (Backward Compatibility)
#### 3. edgar_get_company
Get basic company information from SEC filings.
**Example prompts:**
- "Get information about Tesla"
- "Show me Apple's company details"
**Parameters:**
- `identifier` (required): Company ticker, CIK, or name
- `include_financials` (optional): Include latest financial statements
#### 4. edgar_current_filings
Get the most recent SEC filings across all companies.
**Example prompts:**
- "Show me the latest SEC filings"
- "What are the most recent 10-K filings?"
- "Get current 8-K filings"
**Parameters:**
- `limit` (optional): Number of filings to return (default: 20)
- `form_type` (optional): Filter by form type (e.g., "10-K", "10-Q", "8-K")
## Environment Variables
### EDGAR_IDENTITY (Recommended)
The SEC requires proper identification for all API requests. You can configure this in two ways:
**Option 1: In MCP Client Configuration (Recommended)**
Set it in your MCP client config as shown in the examples above:
```json
"env": {
"EDGAR_IDENTITY": "Your Name your.email@example.com"
}
```
**Option 2: Shell Environment Variable**
Add to your `~/.bashrc` or `~/.zshrc`:
```bash
export EDGAR_IDENTITY="Your Name your.email@example.com"
```
**What happens if not set:**
- Server starts with a warning message
- SEC API may rate-limit or return errors
- The server will log helpful instructions for configuring it
**SEC Requirements:**
- Format: "Full Name email@domain.com"
- Must be a valid email you monitor
- Used by SEC to contact you if issues arise with your API usage
## Troubleshooting
### Finding Logs
Claude Desktop logs MCP server activity to help diagnose issues:
**Log Locations:**
- **macOS**: `~/Library/Logs/Claude/`
- Main log: `mcp.log`
- Server-specific: `mcp-server-edgartools.log`
- **Windows**: `%APPDATA%\Claude\logs\`
**Viewing logs:**
```bash
# macOS - watch logs in real-time
tail -f ~/Library/Logs/Claude/mcp-server-edgartools.log
# macOS - view recent errors
tail -50 ~/Library/Logs/Claude/mcp-server-edgartools.log | grep error
```
### "spawn python ENOENT" Error
**Issue:** Claude Desktop logs show `spawn python ENOENT` error
**Where to check:** View logs at `~/Library/Logs/Claude/mcp-server-edgartools.log`
**Cause:** The `python` command is not found in your system PATH. This is the most common issue on macOS.
**Solution:**
1. **Use `python3` instead of `python` (macOS/Linux):**
```json
{
"mcpServers": {
"edgartools": {
"command": "python3",
"args": ["-m", "edgar.ai"]
}
}
}
```
2. **Or specify the full Python path:**
Find your Python path:
```bash
which python3
```
Then use the full path in your configuration:
```json
{
"mcpServers": {
"edgartools": {
"command": "/opt/homebrew/bin/python3",
"args": ["-m", "edgar.ai"]
}
}
}
```
3. **Verify Python is accessible:**
```bash
python3 --version
# Should show: Python 3.11.x or higher
```
### Server won't start
**Issue:** `ModuleNotFoundError: No module named 'mcp'`
**Solution:** Install AI dependencies
```bash
pip install edgartools[ai]
# or with pip3
pip3 install edgartools[ai]
```
### Client can't find server
**Issue:** Claude Desktop shows connection error
**Solution:** Verify the command works from terminal first
```bash
python3 -m edgar.ai
# Should show: Starting EdgarTools MCP Server v...
# Press Ctrl+C to stop
```
### Wrong Python version
**Issue:** Server starts but tools don't work
**Solution:** MCP requires Python 3.10+. Check your version:
```bash
python --version
```
If using Python 3.9 or earlier, upgrade Python:
```bash
# macOS with Homebrew
brew install python@3.11
# Update your config to use the specific version
{
"mcpServers": {
"edgartools": {
"command": "/opt/homebrew/bin/python3.11",
"args": ["-m", "edgar.ai"]
}
}
}
```
## Verification
### Quick Test
Before configuring your MCP client, verify the server is working:
```bash
python -m edgar.ai --test
```
**Expected output:**
```
Testing EdgarTools MCP Server Configuration...
✓ EdgarTools v4.18.0 imports successfully
✓ MCP framework available
✓ EDGAR_IDENTITY configured: Your Name your@email.com
✓ Core EdgarTools functionality available
✓ All checks passed - MCP server is ready to run
```
If any checks fail, the test will show specific error messages and installation instructions.
### Full Integration Test
1. **Start the server manually:**
```bash
python -m edgar.ai
```
You should see: `Starting EdgarTools MCP Server v4.18.0`
2. **Configure your MCP client** (see configurations above)
3. **Test in your MCP client:**
Try these example prompts:
- "Research Apple Inc with financials and recent filings"
- "Analyze Tesla's quarterly income statement for the last 4 quarters"
- "Get the latest 10-K filings"
4. **Check server logs:**
The server logs to stderr. Check your MCP client's developer console for any errors.
5. **Verify tool availability:**
In Claude Desktop, look for the MCP indicator (🔨) in the bottom-right corner of the chat input. Clicking it should show available EdgarTools tools.
## Migration from Legacy Setup
If you're currently using the old `run_mcp_server.py` entry point, here's how to migrate:
### Old Configuration (Deprecated):
```json
{
"mcpServers": {
"edgartools": {
"command": "python",
"args": ["/absolute/path/to/edgartools/edgar/ai/run_mcp_server.py"]
}
}
}
```
### New Configuration (macOS):
```json
{
"mcpServers": {
"edgartools": {
"command": "python3",
"args": ["-m", "edgar.ai"],
"env": {
"EDGAR_IDENTITY": "Your Name your@email.com"
}
}
}
}
```
### New Configuration (Windows):
```json
{
"mcpServers": {
"edgartools": {
"command": "python",
"args": ["-m", "edgar.ai"],
"env": {
"EDGAR_IDENTITY": "Your Name your@email.com"
}
}
}
}
```
### Benefits of Migrating:
- ✅ No absolute file paths required
- ✅ Works from any directory
- ✅ Proper SEC identity configuration
- ✅ Simpler configuration
- ✅ Better error messages
- ✅ Verification tool support (`--test` flag)
**Note:** The old entry point still works but shows a deprecation warning. It will be removed in a future version.
## Next Steps
- Read the [full MCP documentation](../../../docs-internal/features/edgartools-mcp-ai-support.md) for advanced features
- See [AI package structure](../../../docs-internal/features/ai-mcp-package-structure-plan.md) for architecture details
- Explore example notebooks showing MCP workflows
## Support
- **Issues:** https://github.com/dgunning/edgartools/issues
- **Discussions:** https://github.com/dgunning/edgartools/discussions
- **Documentation:** https://dgunning.github.io/edgartools/

View File

@@ -0,0 +1,394 @@
#!/usr/bin/env python3
"""
EdgarTools MCP Server
MCP (Model Context Protocol) server providing AI agents access to SEC filing data.
This module provides the main entry point for the MCP server.
Usage:
python -m edgar.ai.mcp # Via module
edgartools-mcp # Via console script
"""
import asyncio
import logging
import os
from typing import Any
from mcp import Resource, Tool
from mcp.server import NotificationOptions, Server
from mcp.server.models import InitializationOptions
from mcp.server.stdio import stdio_server
from mcp.types import TextContent
# Set up logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger("edgartools-mcp")
def setup_edgar_identity():
"""Configure SEC identity from environment variable.
The SEC requires proper identification for API requests. This function
checks for the EDGAR_IDENTITY environment variable and configures it.
If not set, logs a warning but continues (API errors will guide user).
"""
try:
from edgar import set_identity
identity = os.environ.get('EDGAR_IDENTITY')
if not identity:
logger.warning(
"EDGAR_IDENTITY environment variable not set. "
"The SEC requires proper identification for API requests.\n"
"Add to your MCP client configuration:\n"
' "env": {"EDGAR_IDENTITY": "Your Name your.email@example.com"}\n'
"Or set in your shell: export EDGAR_IDENTITY=\"Your Name your.email@example.com\""
)
return
set_identity(identity)
logger.info(f"SEC identity configured: {identity}")
except Exception as e:
logger.error(f"Error setting up EDGAR identity: {e}")
# Create the server
app = Server("edgartools")
@app.list_tools()
async def list_tools() -> list[Tool]:
"""List available tools."""
return [
Tool(
name="edgar_company_research",
description="Get company overview and background. Returns profile, 3-year financial trends, and recent filing activity. Use this for initial company research or to get a snapshot of recent performance.",
inputSchema={
"type": "object",
"properties": {
"identifier": {
"type": "string",
"description": "Company ticker (AAPL), CIK (0000320193), or name (Apple Inc)"
},
"include_financials": {
"type": "boolean",
"description": "Include 3-year income statement showing revenue and profit trends",
"default": True
},
"include_filings": {
"type": "boolean",
"description": "Include summary of last 5 SEC filings",
"default": True
},
"include_ownership": {
"type": "boolean",
"description": "Include insider and institutional ownership data (currently not implemented)",
"default": False
},
"detail_level": {
"type": "string",
"enum": ["minimal", "standard", "detailed"],
"description": "Response detail: 'minimal' (key metrics only), 'standard' (balanced), 'detailed' (comprehensive data)",
"default": "standard"
}
},
"required": ["identifier"]
}
),
Tool(
name="edgar_analyze_financials",
description="Detailed financial statement analysis across multiple periods. Use this for trend analysis, growth calculations, or comparing financial performance over time.",
inputSchema={
"type": "object",
"properties": {
"company": {
"type": "string",
"description": "Company ticker (TSLA), CIK (0001318605), or name (Tesla Inc)"
},
"periods": {
"type": "integer",
"description": "Number of periods: 4-5 for trends, 8-10 for patterns (max 10)",
"default": 4
},
"annual": {
"type": "boolean",
"description": "Use annual periods (true) for long-term trends and year-over-year comparisons, or quarterly periods (false) for recent performance and current earnings. Quarterly provides more recent data but may show seasonal volatility.",
"default": True
},
"statement_types": {
"type": "array",
"items": {"type": "string", "enum": ["income", "balance", "cash_flow"]},
"description": "Statements to include: 'income' (revenue, profit, growth), 'balance' (assets, liabilities, equity), 'cash_flow' (operating, investing, financing cash flows)",
"default": ["income"]
}
},
"required": ["company"]
}
),
Tool(
name="edgar_industry_overview",
description="Get overview of an industry sector including company count, major players, and aggregate metrics. Use this to understand industry landscape before diving into specific companies.",
inputSchema={
"type": "object",
"properties": {
"industry": {
"type": "string",
"enum": [
"pharmaceuticals", "biotechnology", "software",
"semiconductors", "banking", "investment",
"insurance", "real_estate", "oil_gas", "retail"
],
"description": "Industry sector to analyze"
},
"include_top_companies": {
"type": "boolean",
"description": "Include list of major companies in the sector",
"default": True
},
"limit": {
"type": "integer",
"description": "Number of top companies to show (by filing activity)",
"default": 10
}
},
"required": ["industry"]
}
),
Tool(
name="edgar_compare_industry_companies",
description="Compare financial performance of companies within an industry sector. Automatically selects top companies or accepts custom company list for side-by-side financial comparison.",
inputSchema={
"type": "object",
"properties": {
"industry": {
"type": "string",
"enum": [
"pharmaceuticals", "biotechnology", "software",
"semiconductors", "banking", "investment",
"insurance", "real_estate", "oil_gas", "retail"
],
"description": "Industry sector to analyze"
},
"companies": {
"type": "array",
"items": {"type": "string"},
"description": "Optional: Specific tickers to compare (e.g., ['AAPL', 'MSFT', 'GOOGL']). If omitted, uses top companies by market presence.",
"default": None
},
"limit": {
"type": "integer",
"description": "Number of companies to compare if not specified (default 5, max 10)",
"default": 5
},
"periods": {
"type": "integer",
"description": "Number of periods for comparison (default 3)",
"default": 3
},
"annual": {
"type": "boolean",
"description": "Annual (true) or quarterly (false) comparison",
"default": True
}
},
"required": ["industry"]
}
)
]
@app.call_tool()
async def call_tool(name: str, arguments: dict[str, Any] | None) -> list[TextContent]:
"""Handle tool calls."""
if arguments is None:
arguments = {}
try:
if name == "edgar_company_research":
from edgar.ai.mcp.tools.company_research import handle_company_research
return await handle_company_research(arguments)
elif name == "edgar_analyze_financials":
from edgar.ai.mcp.tools.financial_analysis import handle_analyze_financials
return await handle_analyze_financials(arguments)
elif name == "edgar_industry_overview":
from edgar.ai.mcp.tools.industry_analysis import handle_industry_overview
return await handle_industry_overview(arguments)
elif name == "edgar_compare_industry_companies":
from edgar.ai.mcp.tools.industry_analysis import handle_compare_industry_companies
return await handle_compare_industry_companies(arguments)
else:
raise ValueError(f"Unknown tool: {name}")
except Exception as e:
logger.error("Error in tool %s: %s", name, e)
return [TextContent(
type="text",
text=f"Error: {str(e)}"
)]
@app.list_resources()
async def list_resources() -> list[Resource]:
"""List available resources."""
return [
Resource(
uri="edgartools://docs/quickstart",
name="EdgarTools Quickstart Guide",
description="Quick start guide for using EdgarTools",
mimeType="text/markdown"
)
]
@app.read_resource()
async def read_resource(uri: str) -> str:
"""Read a resource."""
if uri == "edgartools://docs/quickstart":
return """# EdgarTools Quickstart
## Basic Usage
```python
from edgar import Company, get_current_filings
# Get company information
company = Company("AAPL")
print(f"{company.name} - CIK: {company.cik}")
# Get filings
filings = company.get_filings(form="10-K", limit=5)
for filing in filings:
print(f"{filing.form} - {filing.filing_date}")
# Get current filings across all companies
current = get_current_filings(limit=20)
for filing in current.data.to_pylist():
print(f"{filing['company']} - {filing['form']}")
```
## Available Tools
- **edgar_get_company**: Get detailed company information
- **edgar_current_filings**: Get the latest SEC filings
## Example Queries
- "Get information about Apple Inc including recent financials"
- "Show me the 20 most recent SEC filings"
- "Find current 8-K filings"
"""
else:
raise ValueError(f"Unknown resource: {uri}")
def main():
"""Main entry point for MCP server."""
try:
# Get package version for server version
from edgar.__about__ import __version__
# Configure EDGAR identity from environment
setup_edgar_identity()
async def run_server():
"""Run the async MCP server."""
logger.info(f"Starting EdgarTools MCP Server v{__version__}")
# Use stdio transport
async with stdio_server() as (read_stream, write_stream):
await app.run(
read_stream,
write_stream,
InitializationOptions(
server_name="edgartools",
server_version=__version__, # Sync with package version
capabilities=app.get_capabilities(
notification_options=NotificationOptions(),
experimental_capabilities={}
)
)
)
asyncio.run(run_server())
except KeyboardInterrupt:
logger.info("Server stopped by user")
except Exception as e:
logger.error(f"Server error: {e}", exc_info=True)
raise
def test_server():
"""Test that MCP server is properly configured and ready to run.
Returns:
bool: True if all checks pass, False otherwise
"""
import sys
print("Testing EdgarTools MCP Server Configuration...\n")
all_passed = True
# Test 1: EdgarTools import check
try:
from edgar import Company
from edgar.__about__ import __version__
print(f"✓ EdgarTools v{__version__} imports successfully")
except ImportError as e:
print(f"✗ EdgarTools import error: {e}")
print(" Install with: pip install edgartools")
all_passed = False
# Test 2: MCP framework check
try:
from mcp.server import Server
print("✓ MCP framework available")
except ImportError as e:
print(f"✗ MCP framework not installed: {e}")
print(" Install with: pip install edgartools[ai]")
all_passed = False
# Test 3: Identity configuration check
identity = os.environ.get('EDGAR_IDENTITY')
if identity:
print(f"✓ EDGAR_IDENTITY configured: {identity}")
else:
print("⚠ EDGAR_IDENTITY not set (recommended)")
print(" Set with: export EDGAR_IDENTITY=\"Your Name your@email.com\"")
print(" Or configure in MCP client's env settings")
# Test 4: Quick functionality test
try:
from edgar import get_current_filings
print("✓ Core EdgarTools functionality available")
except Exception as e:
print(f"✗ EdgarTools functionality check failed: {e}")
all_passed = False
# Summary
print()
if all_passed:
print("✓ All checks passed - MCP server is ready to run")
print("\nTo start the server:")
print(" python -m edgar.ai")
print(" or")
print(" edgartools-mcp")
return True
else:
print("✗ Some checks failed - please fix the issues above")
return False
if __name__ == "__main__":
import sys
# Check for --test flag
if "--test" in sys.argv or "-t" in sys.argv:
sys.exit(0 if test_server() else 1)
else:
main()

View File

@@ -0,0 +1,15 @@
"""
EdgarTools MCP Tool Handlers
This module contains workflow-oriented tool handlers for the MCP server.
"""
from edgar.ai.mcp.tools.utils import (
check_output_size,
format_error_with_suggestions,
)
__all__ = [
"check_output_size",
"format_error_with_suggestions",
]

View File

@@ -0,0 +1,192 @@
"""
Company Research Tool Handler
Provides comprehensive company intelligence including profile,
financials, recent activity, and ownership information.
"""
import logging
from typing import Any
from mcp.types import TextContent
from edgar import Company
from edgar.ai.mcp.tools.utils import (
build_company_profile,
check_output_size,
format_error_with_suggestions,
)
logger = logging.getLogger(__name__)
async def handle_company_research(args: dict[str, Any]) -> list[TextContent]:
"""
Handle company research tool requests.
Provides comprehensive company intelligence in one call, combining:
- Company profile (name, CIK, ticker, industry)
- Latest financial information (optional)
- Recent filing activity (optional)
- Ownership highlights (optional)
Args:
args: Tool arguments containing:
- identifier (required): Company ticker, CIK, or name
- include_financials (default True): Include latest financials
- include_filings (default True): Include recent filing summary
- include_ownership (default False): Include ownership highlights
- detail_level (default "standard"): minimal/standard/detailed
Returns:
List containing TextContent with company research results
"""
identifier = args.get("identifier")
detail_level = args.get("detail_level", "standard")
include_financials = args.get("include_financials", True)
include_filings = args.get("include_filings", True)
include_ownership = args.get("include_ownership", False)
if not identifier:
return [TextContent(
type="text",
text="Error: identifier parameter is required"
)]
try:
# Get company
company = Company(identifier)
# Build response parts
response_parts = []
# 1. Company profile
profile = build_company_profile(company, detail_level)
response_parts.append(profile)
# 2. Latest financials (if requested)
if include_financials:
try:
financials = extract_latest_financials(company, detail_level)
if financials:
response_parts.append("\n\nLatest Financials:")
response_parts.append(financials)
except Exception as e:
logger.warning(f"Could not retrieve financials: {e}")
response_parts.append(f"\n\nFinancials: Not available ({str(e)})")
# 3. Recent filings (if requested)
if include_filings:
try:
filings = recent_filing_summary(company, detail_level)
if filings:
response_parts.append("\n\nRecent Filings:")
response_parts.append(filings)
except Exception as e:
logger.warning(f"Could not retrieve filings: {e}")
response_parts.append(f"\n\nRecent Filings: Not available ({str(e)})")
# 4. Ownership highlights (if requested)
if include_ownership:
try:
ownership = ownership_highlights(company)
if ownership:
response_parts.append("\n\nOwnership Highlights:")
response_parts.append(ownership)
except Exception as e:
logger.warning(f"Could not retrieve ownership: {e}")
response_parts.append(f"\n\nOwnership: Not available ({str(e)})")
# Combine response
response_text = "\n".join(response_parts)
# Check output size and truncate if needed
response_text = check_output_size(response_text)
return [TextContent(type="text", text=response_text)]
except Exception as e:
logger.error(f"Error in company research: {e}", exc_info=True)
return [TextContent(
type="text",
text=format_error_with_suggestions(e)
)]
def extract_latest_financials(company: Any, detail_level: str = "standard") -> str:
"""
Extract latest financial information for a company.
Args:
company: Company object
detail_level: Level of detail to include
Returns:
Formatted financial summary
"""
try:
# Get income statement with 3 periods for trend analysis (annual) with concise format for LLM
stmt = company.income_statement(periods=3, annual=True, concise_format=True)
if detail_level == "minimal":
# Just key metrics
parts = ["Latest Annual Period"]
# TODO: Extract specific metrics once we understand the API better
return stmt.to_llm_string()
else:
# Standard or detailed
return stmt.to_llm_string()
except Exception as e:
logger.warning(f"Could not extract financials: {e}")
return ""
def recent_filing_summary(company: Any, detail_level: str = "standard") -> str:
"""
Get summary of recent filing activity.
Args:
company: Company object
detail_level: Level of detail to include
Returns:
Formatted filing summary
"""
try:
# Get recent filings (last 5)
filings = company.get_filings(limit=5)
if not filings:
return "No recent filings found"
parts = []
for filing in filings:
if detail_level == "minimal":
parts.append(f"- {filing.form} ({filing.filing_date})")
else:
parts.append(f"- {filing.form} - {filing.filing_date}")
if hasattr(filing, 'description') and filing.description:
parts.append(f" {filing.description}")
return "\n".join(parts)
except Exception as e:
logger.warning(f"Could not retrieve filings: {e}")
return ""
def ownership_highlights(company: Any) -> str:
"""
Get ownership highlights (insider/institutional activity).
Args:
company: Company object
Returns:
Formatted ownership summary
"""
# TODO: Implement once we understand ownership data access
# This might require analyzing Form 4 (insider) and 13F (institutional) filings
logger.info("Ownership highlights not yet implemented")
return "Ownership data: Feature not yet implemented"

View File

@@ -0,0 +1,106 @@
"""
Financial Analysis Tool Handler
Provides multi-period financial statement analysis.
"""
import logging
from typing import Any
from mcp.types import TextContent
from edgar import Company
from edgar.ai.mcp.tools.utils import (
check_output_size,
format_error_with_suggestions,
)
logger = logging.getLogger(__name__)
async def handle_analyze_financials(args: dict[str, Any]) -> list[TextContent]:
"""
Handle financial analysis tool requests.
Provides multi-period financial statement analysis using Company
convenience methods (income_statement, balance_sheet, cash_flow).
Args:
args: Tool arguments containing:
- company (required): Company ticker, CIK, or name
- periods (default 4): Number of periods to analyze
- annual (default True): Annual (true) or quarterly (false)
- statement_types (default ["income"]): Statements to include
Returns:
List containing TextContent with financial analysis results
"""
company_id = args.get("company")
periods = args.get("periods", 4)
annual = args.get("annual", True)
statement_types = args.get("statement_types", ["income"])
if not company_id:
return [TextContent(
type="text",
text="Error: company parameter is required"
)]
try:
# Get company
company = Company(company_id)
# Extract requested statements
response_parts = []
response_parts.append(f"Financial Analysis: {company.name}")
response_parts.append(f"Periods: {periods} {'Annual' if annual else 'Quarterly'}")
response_parts.append("")
# Process each requested statement type
if "income" in statement_types:
try:
stmt = company.income_statement(periods=periods, annual=annual, concise_format=True)
response_parts.append("=== Income Statement ===")
response_parts.append(stmt.to_llm_string())
response_parts.append("")
except Exception as e:
logger.warning(f"Could not retrieve income statement: {e}")
response_parts.append(f"Income Statement: Not available ({str(e)})")
response_parts.append("")
if "balance" in statement_types:
try:
stmt = company.balance_sheet(periods=periods, annual=annual, concise_format=True)
response_parts.append("=== Balance Sheet ===")
response_parts.append(stmt.to_llm_string())
response_parts.append("")
except Exception as e:
logger.warning(f"Could not retrieve balance sheet: {e}")
response_parts.append(f"Balance Sheet: Not available ({str(e)})")
response_parts.append("")
if "cash_flow" in statement_types:
try:
stmt = company.cash_flow(periods=periods, annual=annual, concise_format=True)
response_parts.append("=== Cash Flow Statement ===")
response_parts.append(stmt.to_llm_string())
response_parts.append("")
except Exception as e:
logger.warning(f"Could not retrieve cash flow: {e}")
response_parts.append(f"Cash Flow: Not available ({str(e)})")
response_parts.append("")
# Combine response
response_text = "\n".join(response_parts)
# Check output size and truncate if needed
response_text = check_output_size(response_text, max_tokens=3000) # Larger limit for financials
return [TextContent(type="text", text=response_text)]
except Exception as e:
logger.error(f"Error in financial analysis: {e}", exc_info=True)
return [TextContent(
type="text",
text=format_error_with_suggestions(e)
)]

View File

@@ -0,0 +1,238 @@
"""
Industry Analysis Tool Handlers
Provides industry sector analysis and competitive benchmarking capabilities.
"""
import logging
from typing import Any
from mcp.types import TextContent
from edgar import Company
from edgar.ai.mcp.tools.utils import (
check_output_size,
format_error_with_suggestions,
)
logger = logging.getLogger(__name__)
# Industry function mapping
INDUSTRY_FUNCTIONS = {
"pharmaceuticals": "get_pharmaceutical_companies",
"biotechnology": "get_biotechnology_companies",
"software": "get_software_companies",
"semiconductors": "get_semiconductor_companies",
"banking": "get_banking_companies",
"investment": "get_investment_companies",
"insurance": "get_insurance_companies",
"real_estate": "get_real_estate_companies",
"oil_gas": "get_oil_gas_companies",
"retail": "get_retail_companies",
}
async def handle_industry_overview(args: dict[str, Any]) -> list[TextContent]:
"""
Handle industry overview tool requests.
Provides overview of an industry sector including:
- Total company count
- SIC code(s)
- Major public companies
- Industry description
Args:
args: Tool arguments containing:
- industry (required): Industry sector name
- include_top_companies (default True): Include major companies
- limit (default 10): Number of top companies to show
Returns:
List containing TextContent with industry overview
"""
industry = args.get("industry")
include_top = args.get("include_top_companies", True)
limit = args.get("limit", 10)
if not industry:
return [TextContent(
type="text",
text="Error: industry parameter is required"
)]
if industry not in INDUSTRY_FUNCTIONS:
return [TextContent(
type="text",
text=f"Error: Unknown industry '{industry}'. Must be one of: {', '.join(INDUSTRY_FUNCTIONS.keys())}"
)]
try:
# Import and call the appropriate industry function
from edgar.ai import helpers
function_name = INDUSTRY_FUNCTIONS[industry]
get_companies = getattr(helpers, function_name)
companies = get_companies()
# Build response
response_parts = [
f"# {industry.replace('_', ' ').title()} Industry Overview",
"",
f"**Total Companies**: {len(companies):,}",
]
# Get unique SIC codes
sic_codes = sorted(companies['sic'].unique().tolist())
if len(sic_codes) == 1:
response_parts.append(f"**SIC Code**: {sic_codes[0]}")
else:
response_parts.append(f"**SIC Codes**: {', '.join(map(str, sic_codes))}")
# Get primary description (from first company)
if len(companies) > 0 and 'sic_description' in companies.columns:
primary_desc = companies['sic_description'].iloc[0]
response_parts.append(f"**Description**: {primary_desc}")
response_parts.append("")
# Add major companies if requested
if include_top and len(companies) > 0:
# Filter to companies with tickers (publicly traded)
public = companies[companies['ticker'].notna()].copy()
if len(public) > 0:
response_parts.append("## Major Public Companies")
response_parts.append("")
# Show top N companies
top_companies = public.head(limit)
for _, row in top_companies.iterrows():
ticker = row['ticker'] if row['ticker'] else 'N/A'
exchange = row['exchange'] if row['exchange'] else 'N/A'
response_parts.append(
f"- **{ticker}** - {row['name']} ({exchange})"
)
else:
response_parts.append("*No public companies found in this sector*")
# Combine response
response_text = "\n".join(response_parts)
# Check output size
response_text = check_output_size(response_text)
return [TextContent(type="text", text=response_text)]
except Exception as e:
logger.error(f"Error in industry overview: {e}", exc_info=True)
return [TextContent(
type="text",
text=format_error_with_suggestions(e)
)]
async def handle_compare_industry_companies(args: dict[str, Any]) -> list[TextContent]:
"""
Handle industry company comparison tool requests.
Compares financial performance of companies within an industry sector.
Args:
args: Tool arguments containing:
- industry (required): Industry sector name
- companies (optional): Specific tickers to compare
- limit (default 5): Number of companies if not specified
- periods (default 3): Number of periods for comparison
- annual (default True): Annual (true) or quarterly (false)
Returns:
List containing TextContent with comparative analysis
"""
industry = args.get("industry")
company_tickers = args.get("companies")
limit = args.get("limit", 5)
periods = args.get("periods", 3)
annual = args.get("annual", True)
if not industry:
return [TextContent(
type="text",
text="Error: industry parameter is required"
)]
if industry not in INDUSTRY_FUNCTIONS:
return [TextContent(
type="text",
text=f"Error: Unknown industry '{industry}'. Must be one of: {', '.join(INDUSTRY_FUNCTIONS.keys())}"
)]
try:
# Import and call the appropriate industry function
from edgar.ai import helpers
function_name = INDUSTRY_FUNCTIONS[industry]
get_companies = getattr(helpers, function_name)
companies = get_companies()
# Select companies
if company_tickers:
# Filter to specified tickers
selected = companies[companies['ticker'].isin(company_tickers)].copy()
if len(selected) == 0:
return [TextContent(
type="text",
text=f"Error: None of the specified tickers found in {industry} industry"
)]
else:
# Use top N companies with tickers
public = companies[companies['ticker'].notna()].copy()
if len(public) == 0:
return [TextContent(
type="text",
text=f"Error: No public companies found in {industry} industry"
)]
selected = public.head(limit)
# Compare financials
response_parts = [
f"# {industry.replace('_', ' ').title()} Industry Comparison",
f"",
f"Comparing {len(selected)} companies over {periods} {'annual' if annual else 'quarterly'} periods",
"",
]
for _, row in selected.iterrows():
ticker = row['ticker']
try:
company = Company(ticker)
stmt = company.income_statement(
periods=periods,
annual=annual,
concise_format=True
)
response_parts.append(f"## {ticker} - {row['name']}")
response_parts.append("")
response_parts.append(stmt.to_llm_string())
response_parts.append("")
except Exception as e:
logger.warning(f"Could not get financials for {ticker}: {e}")
response_parts.append(f"## {ticker} - {row['name']}")
response_parts.append(f"*Financial data not available: {str(e)}*")
response_parts.append("")
# Combine response
response_text = "\n".join(response_parts)
# Check output size (larger limit for comparative data)
response_text = check_output_size(response_text, max_tokens=5000)
return [TextContent(type="text", text=response_text)]
except Exception as e:
logger.error(f"Error in industry comparison: {e}", exc_info=True)
return [TextContent(
type="text",
text=format_error_with_suggestions(e)
)]

View File

@@ -0,0 +1,137 @@
"""
Utility functions for MCP tool handlers.
Provides helper functions for output management, error handling,
and data formatting for MCP responses.
"""
import logging
from typing import Any
logger = logging.getLogger(__name__)
def check_output_size(data: str, max_tokens: int = 2000) -> str:
"""
Prevent context overflow with intelligent summarization.
Estimates token count and truncates/summarizes if needed to stay
within context window limits.
Args:
data: The text data to check
max_tokens: Maximum allowed tokens (default: 2000)
Returns:
Original data if under limit, truncated data otherwise
"""
# Rough estimation: 1 token ≈ 4 characters
estimated_tokens = len(data) / 4
if estimated_tokens > max_tokens:
# Simple truncation with ellipsis
# TODO: Implement smarter summarization in future
char_limit = int(max_tokens * 4 * 0.9) # 90% of limit to be safe
truncated = data[:char_limit]
logger.warning(f"Output truncated: {int(estimated_tokens)} tokens -> {max_tokens} tokens")
return f"{truncated}\n\n... (output truncated to stay within token limit)"
return data
def format_error_with_suggestions(error: Exception) -> str:
"""
Provide helpful error messages with alternatives.
Creates AI-friendly error messages that include specific suggestions
for common error types.
Args:
error: The exception that occurred
Returns:
Formatted error message with suggestions
"""
error_type = type(error).__name__
error_message = str(error)
# Define helpful suggestions for common errors
suggestions_map = {
"CompanyNotFound": [
"Try searching by CIK instead of ticker",
"Use the full company name",
"Check spelling of ticker symbol"
],
"NoFinancialsAvailable": [
"Company may not have filed recent 10-K/10-Q",
"Try include_financials=False for basic info",
"Check filing history with edgar_market_monitor tool"
],
"FileNotFoundError": [
"The requested filing may not be available",
"Try a different form type or date range",
"Verify the company has filed this type of document"
],
"HTTPError": [
"SEC EDGAR website may be temporarily unavailable",
"Check your internet connection",
"Try again in a few moments"
],
"ValueError": [
"Check that all required parameters are provided",
"Verify parameter formats (e.g., valid ticker symbols)",
"Review the tool's parameter documentation"
]
}
suggestions = suggestions_map.get(error_type, [
"Try rephrasing your request",
"Check parameter values",
"Consult the tool documentation"
])
# Format the error response
response_parts = [
f"Error: {error_message}",
f"Error Type: {error_type}",
"",
"Suggestions:"
]
for i, suggestion in enumerate(suggestions, 1):
response_parts.append(f"{i}. {suggestion}")
return "\n".join(response_parts)
def build_company_profile(company: Any, detail_level: str = "standard") -> str:
"""
Build a company profile summary.
Args:
company: Company object
detail_level: Level of detail (minimal/standard/detailed)
Returns:
Formatted company profile text
"""
parts = [f"Company: {company.name}"]
# Add CIK
parts.append(f"CIK: {company.cik}")
# Add ticker if available
if hasattr(company, 'tickers') and company.tickers:
parts.append(f"Ticker: {company.tickers[0]}")
# Add industry/sector if available and detail level permits
if detail_level in ["standard", "detailed"]:
if hasattr(company, 'sic_description'):
parts.append(f"Industry: {company.sic_description}")
# Add description for detailed level
if detail_level == "detailed":
if hasattr(company, 'description') and company.description:
parts.append(f"\nDescription: {company.description}")
return "\n".join(parts)

View File

@@ -0,0 +1,63 @@
"""
EdgarTools AI Skills - Skill discovery and management.
Skills are self-contained packages of documentation and helper functions
that enable AI agents to perform domain-specific tasks with EdgarTools.
"""
from edgar.ai.skills.base import BaseSkill
from edgar.ai.skills.core import edgartools_skill, EdgarToolsSkill
__all__ = [
'BaseSkill',
'EdgarToolsSkill',
'edgartools_skill',
'list_skills',
'get_skill',
]
def list_skills() -> list:
"""
List all available skills (built-in + external).
Returns:
List of BaseSkill instances
Example:
>>> from edgar.ai.skills import list_skills
>>> skills = list_skills()
>>> for skill in skills:
... print(f"{skill.name}: {skill.description}")
"""
# Currently only one built-in skill
# External packages can register additional skills here
return [edgartools_skill]
def get_skill(name: str) -> BaseSkill:
"""
Get skill by name.
Args:
name: Skill name (e.g., "EdgarTools")
Returns:
BaseSkill instance
Raises:
ValueError: If skill not found
Example:
>>> from edgar.ai.skills import get_skill
>>> skill = get_skill("EdgarTools")
>>> docs = skill.get_documents()
"""
for skill in list_skills():
if skill.name == name:
return skill
available = [s.name for s in list_skills()]
raise ValueError(
f"Skill '{name}' not found. Available skills: {', '.join(available)}"
)

View File

@@ -0,0 +1,213 @@
"""
Base class for EdgarTools AI skills.
Provides the foundation for creating AI skills that integrate with
edgar.ai infrastructure. External packages can subclass BaseSkill to
create specialized skills (e.g., insider trading detection, fraud analysis).
"""
from abc import ABC, abstractmethod
from pathlib import Path
from typing import Dict, List, Optional, Callable
__all__ = ['BaseSkill']
class BaseSkill(ABC):
"""
Abstract base class for EdgarTools AI skills.
A skill packages:
- Documentation (markdown files with YAML frontmatter)
- Helper functions (workflow wrappers)
- Examples and patterns
External packages can subclass this to create specialized skills
that integrate seamlessly with edgar.ai infrastructure.
Example:
>>> from edgar.ai.skills.base import BaseSkill
>>> from pathlib import Path
>>>
>>> class InsiderTradingSkill(BaseSkill):
... @property
... def name(self) -> str:
... return "Insider Trading Detection"
...
... @property
... def description(self) -> str:
... return "Analyze Form 4 filings for insider trading patterns"
...
... @property
... def content_dir(self) -> Path:
... return Path(__file__).parent / "content"
...
... def get_helpers(self) -> Dict[str, Callable]:
... return {
... 'detect_unusual_trades': self.detect_unusual_trades,
... }
"""
@property
@abstractmethod
def name(self) -> str:
"""
Skill name for display and identification.
Should be descriptive and unique. Example: "SEC Filing Analysis"
Returns:
Human-readable skill name
"""
pass
@property
@abstractmethod
def description(self) -> str:
"""
Brief description of skill capabilities.
Used by AI agents to determine when to activate the skill.
Should clearly describe what problems the skill solves.
Returns:
One-sentence skill description
"""
pass
@property
@abstractmethod
def content_dir(self) -> Path:
"""
Directory containing skill documentation (markdown files).
This directory should contain:
- skill.md: Main skill documentation with YAML frontmatter
- objects.md: Object reference (optional)
- workflows.md: Workflow patterns (optional)
- readme.md: Installation/overview (optional)
Returns:
Path to skill content directory
"""
pass
@abstractmethod
def get_helpers(self) -> Dict[str, Callable]:
"""
Return dictionary of helper functions this skill provides.
Helper functions are convenience wrappers that simplify
common workflows for the skill's domain.
Returns:
Dict mapping function names to callable objects
Example:
>>> {
... 'get_revenue_trend': helpers.get_revenue_trend,
... 'compare_companies': helpers.compare_companies,
... }
"""
pass
# Non-abstract methods with default implementations
def get_object_docs(self) -> List[Path]:
"""
Return paths to centralized object documentation files to include in exports.
Override this method to specify which centralized API reference docs
should be included when exporting the skill. These docs are copied to
an 'api-reference/' subdirectory in the exported skill package.
Returns:
List of Path objects pointing to markdown documentation files
Example:
>>> def get_object_docs(self) -> List[Path]:
... from pathlib import Path
... root = Path(__file__).parent.parent.parent
... return [
... root / "entity/docs/Company.md",
... root / "xbrl/docs/XBRL.md",
... ]
"""
return [] # Default: no object docs
def get_documents(self) -> List[str]:
"""
List of markdown documents in this skill.
Returns:
List of document names (without .md extension)
"""
if not self.content_dir.exists():
return []
return [f.stem for f in self.content_dir.glob("*.md")]
def get_document_content(self, name: str) -> str:
"""
Get content of a specific markdown document.
Args:
name: Document name (with or without .md extension)
Returns:
Full markdown content as string
Raises:
FileNotFoundError: If document doesn't exist
"""
doc_name = name if name.endswith('.md') else f"{name}.md"
doc_path = self.content_dir / doc_name
if not doc_path.exists():
available = ", ".join(self.get_documents())
raise FileNotFoundError(
f"Document '{name}' not found in skill '{self.name}'. "
f"Available: {available}"
)
return doc_path.read_text()
def export(self, format: str = "claude-desktop", output_dir: Optional[Path] = None, **kwargs) -> Path:
"""
Export skill in specified format.
Args:
format: Export format (default: "claude-desktop")
- "claude-desktop": Claude Desktop Skills format (ZIP)
- "claude-skills": Official Claude Skills format (~/.claude/skills/)
output_dir: Where to create export (default: ./skills_export/)
**kwargs: Additional format-specific parameters
- create_zip (bool): For claude-desktop format (default: True)
- install (bool): For claude-skills format (default: True)
Returns:
Path to exported skill directory or archive
Example:
>>> skill = EdgarToolsSkill()
>>> # Export as ZIP for Claude Desktop upload
>>> path = skill.export(format="claude-desktop")
>>> # Export to ~/.claude/skills/ for automatic discovery
>>> path = skill.export(format="claude-skills")
"""
from edgar.ai.exporters import export_skill
return export_skill(self, format=format, output_dir=output_dir, **kwargs)
def __repr__(self) -> str:
"""String representation of the skill."""
return f"{self.__class__.__name__}(name='{self.name}')"
def __str__(self) -> str:
"""Human-readable skill description."""
docs_count = len(self.get_documents())
helpers_count = len(self.get_helpers())
return (
f"Skill: {self.name}\n"
f"Description: {self.description}\n"
f"Documents: {docs_count}\n"
f"Helper Functions: {helpers_count}"
)

View File

@@ -0,0 +1,119 @@
"""
EdgarTools Skill - Core EdgarTools AI skill.
Provides comprehensive documentation and helper functions for analyzing
SEC filings and financial statements using EdgarTools.
"""
from pathlib import Path
from typing import Dict, Callable
from edgar.ai.skills.base import BaseSkill
__all__ = ['EdgarToolsSkill', 'edgartools_skill']
class EdgarToolsSkill(BaseSkill):
"""
EdgarTools - AI skill for SEC filing analysis.
This skill provides:
- Comprehensive API documentation for SEC filing analysis
- Helper functions for common workflows
- Object reference with token estimates
- Workflow patterns for multi-step analysis
The skill covers:
- Getting filings (3 approaches: Published, Current, Company-specific)
- Getting financials (2 approaches: Entity Facts, Filing XBRL)
- Multi-company analysis
- Object representations optimized for AI
Example:
>>> from edgar.ai.skills.core import edgartools_skill
>>>
>>> # List available documentation
>>> print(edgartools_skill.get_documents())
>>> ['skill', 'objects', 'workflows', 'readme']
>>>
>>> # Get main skill documentation
>>> guide = edgartools_skill.get_document_content("skill")
>>>
>>> # Access helper functions
>>> helpers = edgartools_skill.get_helpers()
>>> get_revenue_trend = helpers['get_revenue_trend']
>>> income = get_revenue_trend("AAPL", periods=3)
>>>
>>> # Export skill for Claude Desktop
>>> path = edgartools_skill.export(
... format="claude-desktop",
... output_dir="~/.config/claude/skills"
... )
"""
@property
def name(self) -> str:
"""Skill name: 'EdgarTools'"""
return "EdgarTools"
@property
def description(self) -> str:
"""Skill description for AI agents."""
return (
"Query and analyze SEC filings and financial statements using EdgarTools. "
"Get company data, filings, XBRL financials, and perform multi-company analysis."
)
@property
def content_dir(self) -> Path:
"""Path to skill documentation directory."""
return Path(__file__).parent
def get_object_docs(self) -> list[Path]:
"""
Return centralized object documentation to include in skill exports.
Returns paths to detailed API reference docs that complement the
skill's tutorial documentation.
Returns:
List of Path objects to centralized markdown documentation files
"""
# Navigate from edgar/ai/skills/core/ to edgar/ root
edgar_root = Path(__file__).parent.parent.parent.parent
return [
edgar_root / "entity/docs/Company.md",
edgar_root / "entity/docs/EntityFiling.md",
edgar_root / "entity/docs/EntityFilings.md",
edgar_root / "xbrl/docs/XBRL.md",
edgar_root / "xbrl/docs/Statement.md",
]
def get_helpers(self) -> Dict[str, Callable]:
"""
Return helper functions provided by this skill.
Helper functions simplify common SEC analysis workflows:
- get_filings_by_period: Get filings for a specific quarter
- get_today_filings: Get recent filings (last ~24 hours)
- get_revenue_trend: Get multi-period income statement
- get_filing_statement: Get statement from specific filing
- compare_companies_revenue: Compare revenue across companies
Returns:
Dict mapping function names to callable objects
"""
# Import here to avoid circular dependencies
from edgar.ai import helpers
return {
'get_filings_by_period': helpers.get_filings_by_period,
'get_today_filings': helpers.get_today_filings,
'get_revenue_trend': helpers.get_revenue_trend,
'get_filing_statement': helpers.get_filing_statement,
'compare_companies_revenue': helpers.compare_companies_revenue,
}
# Create singleton instance for convenience
edgartools_skill = EdgarToolsSkill()