py-pdf · j-t-1 · Jun 10, 2025 · Jun 10, 2025 · Jun 10, 2025 · Jun 11, 2025
diff --git a/pypdf/_writer.py b/pypdf/_writer.py
@@ -1576,21 +1576,36 @@ def add_metadata(self, infos: dict[str, Any]) -> None:
             self._info = DictionaryObject()
         self._info.update(args)
 
+    _UNSET = object()
+
     def compress_identical_objects(
         self,
+        remove_identicals_old: Any = _UNSET,
+        remove_orphans: Any = _UNSET,
+        *,
         remove_identicals: bool = True,
-        remove_orphans: bool = True,
+        remove_unreferenced: bool = True,
     ) -> None:
         """
-        Parse the PDF file and merge objects that have the same hash.
+        Parse the PDF file objects that have the same hash.
         This will make objects common to multiple pages.
         Recommended to be used just before writing output.
 
         Args:
+            remove_identicals_old: Positional arguement, used while remove_orphans is still being deprecated.
+            remove_orphans: Remove unreferenced objects; deprecated use remove_unreferenced.
             remove_identicals: Remove identical objects.
-            remove_orphans: Remove unreferenced objects.
+            remove_unreferenced: Remove unreferenced objects.
 
         """
+        if remove_identicals_old != self._UNSET:
+            # Deprecate indicating keyword-only is supported.
+            assert isinstance(remove_identicals_old, bool)  # Check type!
+            remove_identicals = remove_identicals_old
+        if remove_orphans != self._UNSET:
+            # Deprecate with new name and keyword-only.
+            assert isinstance(remove_orphans, bool)  # Check type!
+            remove_unreferenced = remove_orphans
 
         def replace_in_obj(
             obj: PdfObject, crossref: dict[IndirectObject, IndirectObject]
@@ -1604,17 +1619,17 @@ def replace_in_obj(
             assert isinstance(obj, (DictionaryObject, ArrayObject))
             for k, v in key_val:
                 if isinstance(v, IndirectObject):
-                    orphans[v.idnum - 1] = False
+                    unreferenced[v.idnum - 1] = False
                     if v in crossref:
                         obj[k] = crossref[v]
                 else:
-                    """the filtering on DictionaryObject and ArrayObject only
+                    """The filtering on DictionaryObject and ArrayObject only
                     will be performed within replace_in_obj"""
                     replace_in_obj(v, crossref)
 
-        # _idnum_hash :dict[hash]=(1st_ind_obj,[other_indir_objs,...])
+        # _idnum_hash: dict[hash] = (1st_ind_obj, [2nd_ind_obj,...])
         self._idnum_hash = {}
-        orphans = [True] * len(self._objects)
+        unreferenced = [True] * len(self._objects)
         # look for similar objects
         for idx, obj in enumerate(self._objects):
             if is_null_or_none(obj):
@@ -1639,18 +1654,19 @@ def replace_in_obj(
             if isinstance(obj, (DictionaryObject, ArrayObject)):
                 replace_in_obj(obj, cnv_rev)
 
-        # remove orphans (if applicable)
-        orphans[self.root_object.indirect_reference.idnum - 1] = False  # type: ignore
+        if remove_unreferenced:
+            unreferenced[self.root_object.indirect_reference.idnum - 1] = False  # type: ignore
 
-        if not is_null_or_none(self._info):
-            orphans[self._info.indirect_reference.idnum - 1] = False  # type: ignore
+            if not is_null_or_none(self._info):
+                unreferenced[self._info.indirect_reference.idnum - 1] = False  # type: ignore
 
-        try:
-            orphans[self._ID.indirect_reference.idnum - 1] = False  # type: ignore
-        except AttributeError:
-            pass
-        for i in compress(range(len(self._objects)), orphans):
-            self._objects[i] = None
+            try:
+                unreferenced[self._ID.indirect_reference.idnum - 1] = False  # type: ignore
+            except AttributeError:
+                pass
+
+            for i in compress(range(len(self._objects)), unreferenced):
+                self._objects[i] = None
 
     def get_reference(self, obj: PdfObject) -> IndirectObject:
         idnum = self._objects.index(obj) + 1

diff --git a/tests/test_workflows.py b/tests/test_workflows.py
@@ -267,6 +267,7 @@ def test_transform_compress_identical_objects():
     for page in writer.pages:
         op = Transformation().scale(sx=0.8, sy=0.8)
         page.add_transformation(op)
+        writer.add_page(page)
     writer.compress_identical_objects()
     bytes_out = BytesIO()
     writer.write(bytes_out)