Skip to content

Commit eabdc71

Browse files
Creating audit trail when no data fields found from Schematron Errors (#394)
## Description Addresses TODOs in ttc lambda_function to leave greater audit trail for when no data fields found from schematron errors. Additional test to capture. I added processed_at/timestamp to the general metadata. Ticket only calls out having a timestamp when no schematron error, but seemed like it could be generally helpful? Happy to change up the structure if needed. ## Related Issues Closes #376 ## Additional Notes [Add any additional context or notes that reviewers should know about.] <--------------------- REMOVE THE LINES BELOW BEFORE MERGING ---------------------> ## Checklist Please review and complete the following checklist before submitting your pull request: - [ ] I have ensured that the pull request is of a manageable size, allowing it to be reviewed within a single session. - [ ] I have reviewed my changes to ensure they are clear, concise, and well-documented. - [ ] I have updated the documentation, if applicable. - [ ] I have added or updated test cases to cover my changes, if applicable. - [ ] I have minimized the number of reviewers to include only those essential for the review. ## Checklist for Reviewers Please review and complete the following checklist during the review process: - [ ] The code follows best practices and conventions. - [ ] The changes implement the desired functionality or fix the reported issue. - [ ] The tests cover the new changes and pass successfully. - [ ] Any potential edge cases or error scenarios have been considered.
1 parent ce67bd9 commit eabdc71

File tree

2 files changed

+60
-4
lines changed

2 files changed

+60
-4
lines changed

packages/text-to-code-lambda/src/text_to_code_lambda/lambda_function.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import io
22
import json
33
import os
4+
from datetime import UTC
5+
from datetime import datetime
46

57
from aws_lambda_powertools import Logger
68
from aws_lambda_powertools.utilities.data_classes import SQSEvent
@@ -38,6 +40,11 @@
3840
RETRIEVER = embedder.Embedder()
3941
RERANKER = reranker.Reranker()
4042

43+
# Constants
44+
NO_DATA_FIELDS_MESSAGE = (
45+
"No relevant data fields identified from Schematron errors for TTC processing"
46+
)
47+
4148
# Cache clients and auth to reuse across Lambda invocations
4249
_cached_auth = None
4350
_cached_opensearch_client = None
@@ -137,9 +144,11 @@ def _initialize_ttc_outputs(persistence_id: str) -> tuple[dict, dict]:
137144
"persistence_id": "",
138145
"eicr_metadata": {},
139146
"schematron_errors": {},
147+
"processed_at": "",
140148
}
141149
ttc_output["persistence_id"] = persistence_id
142150
ttc_metadata_output["persistence_id"] = persistence_id
151+
ttc_metadata_output["processed_at"] = datetime.now(UTC).isoformat()
143152
return ttc_output, ttc_metadata_output
144153

145154

@@ -335,11 +344,15 @@ def _process_record_pipeline(
335344
logger.warning(
336345
f"No data fields found from Schematron errors for TTC processing for persistence_id: {persistence_id}"
337346
)
338-
# TODO: update this output to save metadata about the lack of TTC processing due to no relevant data fields being identified to S3 for analysis
339-
ttc_output["message"] = (
340-
"No relevant data fields identified from Schematron errors for TTC processing"
347+
ttc_output["message"] = NO_DATA_FIELDS_MESSAGE
348+
ttc_metadata_output["reason_for_skipping"] = NO_DATA_FIELDS_MESSAGE
349+
logger.info(f"Saving TTC metadata output to S3 for persistence_id {persistence_id}")
350+
ttc_metadata_output_bucket_name = TTC_METADATA_PREFIX.split("/")[0]
351+
lambda_handler.put_file(
352+
file_obj=io.BytesIO(json.dumps(ttc_metadata_output, default=str).encode("utf-8")),
353+
bucket_name=ttc_metadata_output_bucket_name,
354+
object_key=persistence_id,
341355
)
342-
# TODO: Is this enough information to return early?
343356
return ttc_output
344357

345358
original_eicr_content = _load_original_eicr(bucket, persistence_id)

packages/text-to-code-lambda/tests/test_lambda_function.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import json
22

3+
import pytest
4+
35
import lambda_handler
46
from text_to_code_lambda import lambda_function
57

@@ -66,6 +68,7 @@ def test_handler_success(self, example_sqs_event, mock_aws_setup, mock_opensearc
6668
assert ttc_metadata_output["persistence_id"] == mock_aws_setup.persistence_id
6769
assert "eicr_metadata" in ttc_metadata_output
6870
assert "schematron_errors" in ttc_metadata_output
71+
assert "processed_at" in ttc_metadata_output
6972
assert (
7073
len(ttc_metadata_output["schematron_errors"]["Lab Test Name Resulted"])
7174
== EXPECTED_RESULTED_ERRORS
@@ -113,3 +116,43 @@ def test_handler_with_empty_body(self, example_sqs_event, caplog_warning, mock_o
113116
"num_success_eicrs": 1,
114117
}
115118
assert mock_opensearch.search.call_count == expected_num_errors
119+
120+
def test_handler_saves_metadata_when_no_relevant_schematron_fields(
121+
self, example_sqs_event, mock_aws_setup, mock_opensearch, mocker
122+
):
123+
"""Test handler saves TTC metadata output when no relevant Schematron fields are found."""
124+
mocker.patch(
125+
"text_to_code_lambda.lambda_function._load_schematron_data_fields", return_value=[]
126+
)
127+
128+
resp = lambda_function.handler(example_sqs_event, {})
129+
assert resp == {
130+
"statusCode": 200,
131+
"message": "TTC processed successfully!",
132+
"num_success_eicrs": 1,
133+
}
134+
135+
# Assert that the TTC output was not saved to S3
136+
with pytest.raises(FileNotFoundError):
137+
lambda_handler.get_file_content_from_s3(
138+
bucket_name=mock_aws_setup.ttc_output_bucket_name,
139+
object_key=mock_aws_setup.persistence_id,
140+
)
141+
142+
# Assert that the TTC metadata output was saved to S3 with the expected content
143+
ttc_metadata_output = json.loads(
144+
lambda_handler.get_file_content_from_s3(
145+
bucket_name=mock_aws_setup.ttc_metadata_bucket_name,
146+
object_key=mock_aws_setup.persistence_id,
147+
)
148+
)
149+
assert ttc_metadata_output is not None
150+
assert ttc_metadata_output["persistence_id"] == mock_aws_setup.persistence_id
151+
assert (
152+
ttc_metadata_output["reason_for_skipping"]
153+
== "No relevant data fields identified from Schematron errors for TTC processing"
154+
)
155+
assert "processed_at" in ttc_metadata_output
156+
assert ttc_metadata_output["eicr_metadata"] == {}
157+
assert ttc_metadata_output["schematron_errors"] == {}
158+
assert mock_opensearch.search.call_count == 0

0 commit comments

Comments
 (0)