Creating audit trail when no data fields found from Schematron Errors (#394)

robertandremitchell · web-flow · commit eabdc711613a · 2026-03-30T09:16:13.000-04:00
## Description Addresses TODOs in ttc lambda_function to leave greater audit trail for when no data fields found from schematron errors. Additional test to capture. I added processed_at/timestamp to the general metadata. Ticket only calls out having a timestamp when no schematron error, but seemed like it could be generally helpful? Happy to change up the structure if needed. ## Related Issues Closes #376 ## Additional Notes [Add any additional context or notes that reviewers should know about.] <--------------------- REMOVE THE LINES BELOW BEFORE MERGING ---------------------> ## Checklist Please review and complete the following checklist before submitting your pull request: - [ ] I have ensured that the pull request is of a manageable size, allowing it to be reviewed within a single session. - [ ] I have reviewed my changes to ensure they are clear, concise, and well-documented. - [ ] I have updated the documentation, if applicable. - [ ] I have added or updated test cases to cover my changes, if applicable. - [ ] I have minimized the number of reviewers to include only those essential for the review. ## Checklist for Reviewers Please review and complete the following checklist during the review process: - [ ] The code follows best practices and conventions. - [ ] The changes implement the desired functionality or fix the reported issue. - [ ] The tests cover the new changes and pass successfully. - [ ] Any potential edge cases or error scenarios have been considered.
diff --git a/packages/text-to-code-lambda/src/text_to_code_lambda/lambda_function.py b/packages/text-to-code-lambda/src/text_to_code_lambda/lambda_function.py
@@ -1,6 +1,8 @@
 import io
 import json
 import os
+from datetime import UTC
+from datetime import datetime
 
 from aws_lambda_powertools import Logger
 from aws_lambda_powertools.utilities.data_classes import SQSEvent
@@ -38,6 +40,11 @@
 RETRIEVER = embedder.Embedder()
 RERANKER = reranker.Reranker()
 
+# Constants
+NO_DATA_FIELDS_MESSAGE = (
+    "No relevant data fields identified from Schematron errors for TTC processing"
+)
+
 # Cache clients and auth to reuse across Lambda invocations
 _cached_auth = None
 _cached_opensearch_client = None
@@ -137,9 +144,11 @@ def _initialize_ttc_outputs(persistence_id: str) -> tuple[dict, dict]:
         "persistence_id": "",
         "eicr_metadata": {},
         "schematron_errors": {},
+        "processed_at": "",
     }
     ttc_output["persistence_id"] = persistence_id
     ttc_metadata_output["persistence_id"] = persistence_id
+    ttc_metadata_output["processed_at"] = datetime.now(UTC).isoformat()
     return ttc_output, ttc_metadata_output
 
 
@@ -335,11 +344,15 @@ def _process_record_pipeline(
         logger.warning(
             f"No data fields found from Schematron errors for TTC processing for persistence_id: {persistence_id}"
         )
-        # TODO: update this output to save metadata about the lack of TTC processing due to no relevant data fields being identified to S3 for analysis
-        ttc_output["message"] = (
-            "No relevant data fields identified from Schematron errors for TTC processing"
+        ttc_output["message"] = NO_DATA_FIELDS_MESSAGE
+        ttc_metadata_output["reason_for_skipping"] = NO_DATA_FIELDS_MESSAGE
+        logger.info(f"Saving TTC metadata output to S3 for persistence_id {persistence_id}")
+        ttc_metadata_output_bucket_name = TTC_METADATA_PREFIX.split("/")[0]
+        lambda_handler.put_file(
+            file_obj=io.BytesIO(json.dumps(ttc_metadata_output, default=str).encode("utf-8")),
+            bucket_name=ttc_metadata_output_bucket_name,
+            object_key=persistence_id,
         )
-        # TODO: Is this enough information to return early?
         return ttc_output
 
     original_eicr_content = _load_original_eicr(bucket, persistence_id)
diff --git a/packages/text-to-code-lambda/tests/test_lambda_function.py b/packages/text-to-code-lambda/tests/test_lambda_function.py
@@ -1,5 +1,7 @@
 import json
 
+import pytest
+
 import lambda_handler
 from text_to_code_lambda import lambda_function
 
@@ -66,6 +68,7 @@ def test_handler_success(self, example_sqs_event, mock_aws_setup, mock_opensearc
         assert ttc_metadata_output["persistence_id"] == mock_aws_setup.persistence_id
         assert "eicr_metadata" in ttc_metadata_output
         assert "schematron_errors" in ttc_metadata_output
+        assert "processed_at" in ttc_metadata_output
         assert (
             len(ttc_metadata_output["schematron_errors"]["Lab Test Name Resulted"])
             == EXPECTED_RESULTED_ERRORS
@@ -113,3 +116,43 @@ def test_handler_with_empty_body(self, example_sqs_event, caplog_warning, mock_o
             "num_success_eicrs": 1,
         }
         assert mock_opensearch.search.call_count == expected_num_errors
+
+    def test_handler_saves_metadata_when_no_relevant_schematron_fields(
+        self, example_sqs_event, mock_aws_setup, mock_opensearch, mocker
+    ):
+        """Test handler saves TTC metadata output when no relevant Schematron fields are found."""
+        mocker.patch(
+            "text_to_code_lambda.lambda_function._load_schematron_data_fields", return_value=[]
+        )
+
+        resp = lambda_function.handler(example_sqs_event, {})
+        assert resp == {
+            "statusCode": 200,
+            "message": "TTC processed successfully!",
+            "num_success_eicrs": 1,
+        }
+
+        # Assert that the TTC output was not saved to S3
+        with pytest.raises(FileNotFoundError):
+            lambda_handler.get_file_content_from_s3(
+                bucket_name=mock_aws_setup.ttc_output_bucket_name,
+                object_key=mock_aws_setup.persistence_id,
+            )
+
+        # Assert that the TTC metadata output was saved to S3 with the expected content
+        ttc_metadata_output = json.loads(
+            lambda_handler.get_file_content_from_s3(
+                bucket_name=mock_aws_setup.ttc_metadata_bucket_name,
+                object_key=mock_aws_setup.persistence_id,
+            )
+        )
+        assert ttc_metadata_output is not None
+        assert ttc_metadata_output["persistence_id"] == mock_aws_setup.persistence_id
+        assert (
+            ttc_metadata_output["reason_for_skipping"]
+            == "No relevant data fields identified from Schematron errors for TTC processing"
+        )
+        assert "processed_at" in ttc_metadata_output
+        assert ttc_metadata_output["eicr_metadata"] == {}
+        assert ttc_metadata_output["schematron_errors"] == {}
+        assert mock_opensearch.search.call_count == 0