Allow visit definition to work with a single exposure in multi-snap

timj · timj · commit a5df38b73c9f · 2023-03-27T15:58:12.000-07:00
This is the true incremental mode where we ingest a new file
and immediately run visit definition.
diff --git a/python/lsst/obs/base/defineVisits.py b/python/lsst/obs/base/defineVisits.py
@@ -214,6 +214,50 @@ def __init__(self, config: GroupExposuresConfig, **kwargs: Any):
         configBaseType=GroupExposuresConfig,
     )
 
+    @abstractmethod
+    def find_missing(
+        self, exposures: list[DimensionRecord], registry: lsst.daf.butler.Registry
+    ) -> list[DimensionRecord]:
+        """Determine, if possible, which exposures might be missing.
+
+        Parameters
+        ----------
+        exposures : `list` of `lsst.daf.butler.DimensionRecord`
+            The exposure records to analyze.
+        registry : `lsst.daf.butler.Registry`
+            A butler registry that contains these exposure records.
+
+        Returns
+        -------
+        missing : `list` of `lsst.daf.butler.DimensionRecord`
+            Any exposure records present in registry that were related to
+            the given exposures but were missing from that list and deemed
+            to be relevant.
+
+        Notes
+        -----
+        Only some grouping schemes are able to find missing exposures. It
+        is acceptable to return an empty list.
+        """
+        raise NotImplementedError()
+
+    @abstractmethod
+    def group_exposures(self, exposures: list[DimensionRecord]) -> dict[Any, list[DimensionRecord]]:
+        """Group the exposures in a way most natural for this visit definition.
+
+        Parameters
+        ----------
+        exposures : `list` of `lsst.daf.butler.DimensionRecord`
+            The exposure records to group.
+
+        Returns
+        -------
+        groups : `dict` [Any, `list` of `DimensionRecord`]
+            Groupings of exposure records. The key type is relevant to the
+            specific visit definition and could be a string or a tuple.
+        """
+        raise NotImplementedError()
+
     @abstractmethod
     def group(self, exposures: List[DimensionRecord]) -> Iterable[VisitDefinitionData]:
         """Group the given exposures into visits.
@@ -676,6 +720,14 @@ def run(
                 "visit_system",
                 {"instrument": instrument, "id": visitSystem.value, "name": str(visitSystem)},
             )
+
+        # In true incremental we will be given the second snap on its
+        # own on the assumption that the previous snap was already handled.
+        # For correct grouping we need access to the other exposures in the
+        # visit.
+        if incremental:
+            exposures.extend(self.groupExposures.find_missing(exposures, self.butler.registry))
+
         # Group exposures into visits, delegating to subtask.
         self.log.info("Grouping %d exposure(s) into visits.", len(exposures))
         definitions = list(self.groupExposures.group(exposures))
@@ -815,6 +867,16 @@ class _GroupExposuresOneToOneTask(GroupExposuresTask, metaclass=ABCMeta):
 
     ConfigClass = _GroupExposuresOneToOneConfig
 
+    def find_missing(
+        self, exposures: list[DimensionRecord], registry: lsst.daf.butler.Registry
+    ) -> list[DimensionRecord]:
+        # By definition no exposures can be missing.
+        return []
+
+    def group_exposures(self, exposures: list[DimensionRecord]) -> dict[Any, list[DimensionRecord]]:
+        # No grouping.
+        return {exposure.id: exposure for exposure in exposures}
+
     def group(self, exposures: List[DimensionRecord]) -> Iterable[VisitDefinitionData]:
         # Docstring inherited from GroupExposuresTask.
         visit_systems = {VisitSystem.from_name("one-to-one")}
@@ -861,12 +923,37 @@ class _GroupExposuresByGroupMetadataTask(GroupExposuresTask, metaclass=ABCMeta):
 
     ConfigClass = _GroupExposuresByGroupMetadataConfig
 
-    def group(self, exposures: List[DimensionRecord]) -> Iterable[VisitDefinitionData]:
-        # Docstring inherited from GroupExposuresTask.
-        visit_systems = {VisitSystem.from_name("by-group-metadata")}
+    def find_missing(
+        self, exposures: list[DimensionRecord], registry: lsst.daf.butler.Registry
+    ) -> list[DimensionRecord]:
+        groups = self.group_exposures(exposures)
+        missing_exposures: list[DimensionRecord] = []
+        for exposures_in_group in groups.values():
+            # We can not tell how many exposures are expected to be in the
+            # visit so we have to query every time.
+            first = exposures_in_group[0]
+            records = set(
+                registry.queryDimensionRecords(
+                    "exposure",
+                    where="exposure.group_name = group",
+                    bind={"group": first.group_name},
+                    instrument=first.instrument,
+                )
+            )
+            records.difference_update(set(exposures_in_group))
+            missing_exposures.extend(list(records))
+        return missing_exposures
+
+    def group_exposures(self, exposures: list[DimensionRecord]) -> dict[Any, list[DimensionRecord]]:
         groups = defaultdict(list)
         for exposure in exposures:
             groups[exposure.group_name].append(exposure)
+        return groups
+
+    def group(self, exposures: List[DimensionRecord]) -> Iterable[VisitDefinitionData]:
+        # Docstring inherited from GroupExposuresTask.
+        visit_systems = {VisitSystem.from_name("by-group-metadata")}
+        groups = self.group_exposures(exposures)
         for visitName, exposuresInGroup in groups.items():
             instrument = exposuresInGroup[0].instrument
             visitId = exposuresInGroup[0].group_id
@@ -914,14 +1001,43 @@ class _GroupExposuresByCounterAndExposuresTask(GroupExposuresTask, metaclass=ABC
 
     ConfigClass = _GroupExposuresByCounterAndExposuresConfig
 
+    def find_missing(
+        self, exposures: list[DimensionRecord], registry: lsst.daf.butler.Registry
+    ) -> list[DimensionRecord]:
+        """Analyze the exposures and return relevant exposures known to
+        registry.
+        """
+        groups = self.group_exposures(exposures)
+        missing_exposures: list[DimensionRecord] = []
+        for exposures_in_group in groups.values():
+            sorted_exposures = sorted(exposures_in_group, key=lambda e: e.seq_num)
+            first = sorted_exposures[0]
+            if len(sorted_exposures) < first.seq_end - first.seq_start + 1:
+                # Missing something. Check registry.
+                records = set(
+                    registry.queryDimensionRecords(
+                        "exposure",
+                        where="exposure.seq_start = seq_start AND exposure.seq_end = seq_end",
+                        bind={"seq_start": first.seq_start, "seq_end": first.seq_end},
+                        instrument=first.instrument,
+                    )
+                )
+                records.difference_update(set(sorted_exposures))
+                missing_exposures.extend(list(records))
+        return missing_exposures
+
+    def group_exposures(self, exposures: list[DimensionRecord]) -> dict[Any, list[DimensionRecord]]:
+        groups = defaultdict(list)
+        for exposure in exposures:
+            groups[exposure.day_obs, exposure.seq_start, exposure.seq_end].append(exposure)
+        return groups
+
     def group(self, exposures: List[DimensionRecord]) -> Iterable[VisitDefinitionData]:
         # Docstring inherited from GroupExposuresTask.
         system_one_to_one = VisitSystem.from_name("one-to-one")
         system_seq_start_end = VisitSystem.from_name("by-seq-start-end")
 
-        groups = defaultdict(list)
-        for exposure in exposures:
-            groups[exposure.day_obs, exposure.seq_start, exposure.seq_end].append(exposure)
+        groups = self.group_exposures(exposures)
         for visit_key, exposures_in_group in groups.items():
             instrument = exposures_in_group[0].instrument
 
diff --git a/tests/test_defineVisits.py b/tests/test_defineVisits.py
@@ -89,6 +89,7 @@ def define_visits(
     ) -> None:
         for records in exposures:
             self.butler.registry.insertDimensionData("exposure", *ensure_iterable(records))
+            # Include all records so far in definition.
             dataIds = [d for d in self.butler.registry.queryDataIds("exposure", instrument="DummyCam")]
             self.task.run(dataIds, incremental=incremental)
 
@@ -97,19 +98,39 @@ def test_defineVisits(self):
         self.define_visits([[r for r in self.records.values()]], incremental=False)  # list inside a list
         self.assertVisits()
 
-    def test_incremental(self):
+    def test_incremental_cumulative(self):
         # Define the visits after each exposure.
         self.define_visits([exp for exp in self.records.values()], incremental=True)
         self.assertVisits()
 
-    def test_incremental_reverse(self):
+    def test_incremental_cumulative_reverse(self):
         # In reverse order we should still eventually end up with the right
         # answer.
         with self.assertLogs("lsst.defineVisits.groupExposures", level="WARNING") as cm:
             self.define_visits(list(reversed(self.records.values())), incremental=True)
         self.assertIn("Skipping the multi-snap definition", "\n".join(cm.output))
         self.assertVisits()
 
+    def define_visits_incrementally(self, exposure: DimensionRecord) -> None:
+        self.butler.registry.insertDimensionData("exposure", exposure)
+        dataIds = [
+            d
+            for d in self.butler.registry.queryDataIds(
+                "exposure", instrument="DummyCam", exposure=exposure.id
+            )
+        ]
+        self.task.run(dataIds, incremental=True)
+
+    def test_incremental(self):
+        for record in self.records.values():
+            self.define_visits_incrementally(record)
+        self.assertVisits()
+
+    def test_incremental_reverse(self):
+        for record in reversed(self.records.values()):
+            self.define_visits_incrementally(record)
+        self.assertVisits()
+
     def testPickleTask(self):
         stream = pickle.dumps(self.task)
         copy = pickle.loads(stream)