Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 35 additions & 14 deletions edgar/attachments.py
Original file line number Diff line number Diff line change
Expand Up @@ -1025,20 +1025,41 @@ def get_filing_dates(self)-> Optional[Tuple[str,str, Optional[str]]]:
grouping_divs = self._soup.find_all("div", class_="formGrouping")
if len(grouping_divs) == 0:
return None
date_grouping_div = grouping_divs[0]
info_divs = date_grouping_div.find_all("div", class_="info")
filing_date = info_divs[0].text.strip()
accepted_date = info_divs[1].text.strip()

if len(grouping_divs) > 1:
period_grouping_div = grouping_divs[1]
first_info_div = period_grouping_div.find("div", class_="info")
if first_info_div:
period = first_info_div.text.strip()
result = filing_date, accepted_date, period
self._cached_filing_dates = result
return result
result = filing_date, accepted_date, None

# Build a label -> value map by pairing each <div class="infoHead">
# with the next sibling <div class="info"> under the same grouping.
# This is label-based rather than positional: on multi-filer Schedule
# 13D/G filings the Filer(s) block can appear at index 1, and a
# positional lookup would return its concatenated filer names as the
# period of report, which then crashes downstream isoformat parsers.
label_to_value: Dict[str, str] = {}
for grouping in grouping_divs:
children = [c for c in grouping.find_all("div", recursive=False)
if c.get("class")]
current_label: Optional[str] = None
for child in children:
classes = child.get("class") or []
if "infoHead" in classes:
current_label = child.text.strip().lower()
elif "info" in classes and current_label is not None:
# Only keep the first value for each label.
label_to_value.setdefault(current_label, child.text.strip())
current_label = None

filing_date = label_to_value.get("filing date")
accepted_date = label_to_value.get("accepted")
period = label_to_value.get("period of report")

# Fall back to the legacy positional layout if the label-based lookup
# missed either of the always-present date fields.
if filing_date is None or accepted_date is None:
info_divs = grouping_divs[0].find_all("div", class_="info")
if filing_date is None and len(info_divs) >= 1:
filing_date = info_divs[0].text.strip()
if accepted_date is None and len(info_divs) >= 2:
accepted_date = info_divs[1].text.strip()

result = filing_date, accepted_date, period
self._cached_filing_dates = result
return result

Expand Down
115 changes: 115 additions & 0 deletions tests/issues/regression/test_issue_sc13g_period_of_report.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
"""
Regression test for Schedule 13D/G multi-filer crash.

FilingHomepage.get_filing_dates used to pick the "Period of Report" block
by position (grouping_divs[1]). On multi-filer Schedule 13D/G filings, the
Filer(s) block can appear at index 1 and its .text.strip() concatenates
each filer's name with no delimiter. Downstream callers (e.g.
sec-edgar-mcp) then blow up with "Invalid isoformat string: ...". The fix
is to pick the block by matching its <div class="infoHead"> label instead
of by position.
"""

import pytest
from bs4 import BeautifulSoup

from edgar.attachments import Attachments, FilingHomepage

pytestmark = pytest.mark.fast


def _homepage_from_html(html: str) -> FilingHomepage:
soup = BeautifulSoup(html, "html.parser")
# Attachments is irrelevant for these tests; pass an empty instance.
attachments = Attachments(document_files=[], data_files=[], primary_documents=[])
return FilingHomepage(url="http://example/index.html", soup=soup, attachments=attachments)


_FILING_AND_ACCEPTED = """
<div class="formGrouping">
<div class="infoHead">Filing Date</div>
<div class="info">2024-05-01</div>
<div class="infoHead">Accepted</div>
<div class="info">2024-05-01 16:30:00</div>
</div>
"""


def test_multi_filer_does_not_return_concatenated_filer_names():
"""When the Filer(s) block sits at grouping_divs[1], period should be None."""
html = f"""
<html><body>
{_FILING_AND_ACCEPTED}
<div class="formGrouping">
<div class="infoHead">Filer(s)</div>
<div class="info">FILER ONE</div>
<div class="info">FILER TWO, LLC</div>
<div class="info">FILER THREE</div>
<div class="info">FILER FOUR, INC.</div>
</div>
</body></html>
"""
homepage = _homepage_from_html(html)
filing_date, accepted_date, period = homepage.get_filing_dates()

assert filing_date == "2024-05-01"
assert accepted_date == "2024-05-01 16:30:00"
assert period is None, f"Expected None, got {period!r}"


def test_period_block_found_by_label_when_at_index_1():
"""Traditional layout: grouping_divs[1] is Period of Report."""
html = f"""
<html><body>
{_FILING_AND_ACCEPTED}
<div class="formGrouping">
<div class="infoHead">Period of Report</div>
<div class="info">2024-03-31</div>
</div>
</body></html>
"""
homepage = _homepage_from_html(html)
filing_date, accepted_date, period = homepage.get_filing_dates()

assert filing_date == "2024-05-01"
assert accepted_date == "2024-05-01 16:30:00"
assert period == "2024-03-31"


def test_period_block_found_even_when_not_at_index_1():
"""Label-based lookup should work regardless of position."""
html = f"""
<html><body>
{_FILING_AND_ACCEPTED}
<div class="formGrouping">
<div class="infoHead">Filer(s)</div>
<div class="info">FILER ONE</div>
<div class="info">FILER TWO, LLC</div>
</div>
<div class="formGrouping">
<div class="infoHead">Period of Report</div>
<div class="info">2024-03-31</div>
</div>
</body></html>
"""
homepage = _homepage_from_html(html)
filing_date, accepted_date, period = homepage.get_filing_dates()

assert filing_date == "2024-05-01"
assert accepted_date == "2024-05-01 16:30:00"
assert period == "2024-03-31"


def test_no_period_block_returns_none_for_period():
"""No Period of Report block at all -> period is None."""
html = f"""
<html><body>
{_FILING_AND_ACCEPTED}
</body></html>
"""
homepage = _homepage_from_html(html)
filing_date, accepted_date, period = homepage.get_filing_dates()

assert filing_date == "2024-05-01"
assert accepted_date == "2024-05-01 16:30:00"
assert period is None