Project-MONAI
diff --git a/‎MANIFEST.in‎
Lines changed: 2 additions & 0 deletions b/‎MANIFEST.in‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎monai/losses/image_dissimilarity.py‎
Lines changed: 6 additions & 6 deletions b/‎monai/losses/image_dissimilarity.py‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎monai/losses/spectral_loss.py‎
Lines changed: 2 additions & 2 deletions b/‎monai/losses/spectral_loss.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎monai/losses/ssim_loss.py‎
Lines changed: 3 additions & 3 deletions b/‎monai/losses/ssim_loss.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎monai/networks/layers/filtering.py‎
Lines changed: 32 additions & 17 deletions b/‎monai/networks/layers/filtering.py‎
Lines changed: 32 additions & 17 deletions
diff --git a/‎monai/networks/nets/autoencoderkl.py‎
Lines changed: 34 additions & 7 deletions b/‎monai/networks/nets/autoencoderkl.py‎
Lines changed: 34 additions & 7 deletions
diff --git a/‎monai/networks/nets/segresnet_ds.py‎
Lines changed: 14 additions & 0 deletions b/‎monai/networks/nets/segresnet_ds.py‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎monai/networks/nets/swin_unetr.py‎
Lines changed: 19 additions & 1 deletion b/‎monai/networks/nets/swin_unetr.py‎
Lines changed: 19 additions & 1 deletion
diff --git a/‎monai/transforms/signal/array.py‎
Lines changed: 2 additions & 2 deletions b/‎monai/transforms/signal/array.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎monai/transforms/utility/array.py‎
Lines changed: 19 additions & 4 deletions b/‎monai/transforms/utility/array.py‎
Lines changed: 19 additions & 4 deletions
@@ -3,3 +3,5 @@ include monai/_version.py
 
 include README.md
 include LICENSE
+
+prune tests
@@ -15,7 +15,7 @@
 from torch.nn import functional as F
 from torch.nn.modules.loss import _Loss
 
-from monai.networks.layers import gaussian_1d, separable_filtering
+from monai.networks.layers import separable_filtering
 from monai.utils import LossReduction
 from monai.utils.module import look_up_option
 
@@ -34,11 +34,11 @@ def make_triangular_kernel(kernel_size: int) -> torch.Tensor:
 
 
 def make_gaussian_kernel(kernel_size: int) -> torch.Tensor:
-    sigma = torch.tensor(kernel_size / 3.0)
-    kernel = gaussian_1d(sigma=sigma, truncated=kernel_size // 2, approx="sampled", normalize=False) * (
-        2.5066282 * sigma
-    )
-    return kernel[:kernel_size]
+    sigma = kernel_size / 3.0
+    half = kernel_size // 2
+    x = torch.arange(-half, half + 1, dtype=torch.float)
+    kernel = torch.exp(-0.5 / (sigma * sigma) * x**2)
+    return kernel
 
 
 kernel_dict = {
 
@@ -55,8 +55,8 @@ def __init__(
         self.fft_norm = fft_norm
 
     def forward(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
-        input_amplitude = self._get_fft_amplitude(target)
-        target_amplitude = self._get_fft_amplitude(input)
+        input_amplitude = self._get_fft_amplitude(input)
+        target_amplitude = self._get_fft_amplitude(target)
 
         # Compute distance between amplitude of frequency components
         # See Section 3.3 from https://arxiv.org/abs/2005.00341
 
@@ -111,17 +111,17 @@ def forward(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
                 # 2D data
                 x = torch.ones([1,1,10,10])/2
                 y = torch.ones([1,1,10,10])/2
-                print(1-SSIMLoss(spatial_dims=2)(x,y))
+                print(SSIMLoss(spatial_dims=2)(x,y))
 
                 # pseudo-3D data
                 x = torch.ones([1,5,10,10])/2  # 5 could represent number of slices
                 y = torch.ones([1,5,10,10])/2
-                print(1-SSIMLoss(spatial_dims=2)(x,y))
+                print(SSIMLoss(spatial_dims=2)(x,y))
 
                 # 3D data
                 x = torch.ones([1,1,10,10,10])/2
                 y = torch.ones([1,1,10,10,10])/2
-                print(1-SSIMLoss(spatial_dims=3)(x,y))
+                print(SSIMLoss(spatial_dims=3)(x,y))
         """
         ssim_value = self.ssim_metric._compute_tensor(input, target).view(-1, 1)
         loss: torch.Tensor = 1 - ssim_value
 
@@ -221,7 +221,8 @@ def __init__(self, spatial_sigma, color_sigma):
             self.len_spatial_sigma = 3
         else:
             raise ValueError(
-                f"len(spatial_sigma) {spatial_sigma} must match number of spatial dims {self.ken_spatial_sigma}."
+                f"Length of `spatial_sigma` must match number of spatial dims (1, 2 or 3)"
+                f"or be a single float value ({spatial_sigma=})."
             )
 
         # Register sigmas as trainable parameters.
@@ -231,6 +232,10 @@ def __init__(self, spatial_sigma, color_sigma):
         self.sigma_color = torch.nn.Parameter(torch.tensor(color_sigma))
 
     def forward(self, input_tensor):
+        if len(input_tensor.shape) < 3:
+            raise ValueError(
+                f"Input must have at least 3 dimensions (batch, channel, *spatial_dims), got {len(input_tensor.shape)}"
+            )
         if input_tensor.shape[1] != 1:
             raise ValueError(
                 f"Currently channel dimensions >1 ({input_tensor.shape[1]}) are not supported. "
@@ -239,24 +244,27 @@ def forward(self, input_tensor):
             )
 
         len_input = len(input_tensor.shape)
+        spatial_dims = len_input - 2
 
         # C++ extension so far only supports 5-dim inputs.
-        if len_input == 3:
+        if spatial_dims == 1:
             input_tensor = input_tensor.unsqueeze(3).unsqueeze(4)
-        elif len_input == 4:
+        elif spatial_dims == 2:
             input_tensor = input_tensor.unsqueeze(4)
 
-        if self.len_spatial_sigma != len_input:
-            raise ValueError(f"Spatial dimension ({len_input}) must match initialized len(spatial_sigma).")
+        if self.len_spatial_sigma != spatial_dims:
+            raise ValueError(
+                f"Number of spatial dimensions ({spatial_dims}) must match initialized `len(spatial_sigma)`."
+            )
 
         prediction = TrainableBilateralFilterFunction.apply(
             input_tensor, self.sigma_x, self.sigma_y, self.sigma_z, self.sigma_color
         )
 
         # Make sure to return tensor of the same shape as the input.
-        if len_input == 3:
+        if spatial_dims == 1:
             prediction = prediction.squeeze(4).squeeze(3)
-        elif len_input == 4:
+        elif spatial_dims == 2:
             prediction = prediction.squeeze(4)
 
         return prediction
@@ -389,7 +397,8 @@ def __init__(self, spatial_sigma, color_sigma):
             self.len_spatial_sigma = 3
         else:
             raise ValueError(
-                f"len(spatial_sigma) {spatial_sigma} must match number of spatial dims {self.ken_spatial_sigma}."
+                f"Length of `spatial_sigma` must match number of spatial dims (1, 2 or 3)\n"
+                f"or be a single float value ({spatial_sigma=})."
             )
 
         # Register sigmas as trainable parameters.
@@ -399,39 +408,45 @@ def __init__(self, spatial_sigma, color_sigma):
         self.sigma_color = torch.nn.Parameter(torch.tensor(color_sigma))
 
     def forward(self, input_tensor, guidance_tensor):
+        if len(input_tensor.shape) < 3:
+            raise ValueError(
+                f"Input must have at least 3 dimensions (batch, channel, *spatial_dims), got {len(input_tensor.shape)}"
+            )
         if input_tensor.shape[1] != 1:
             raise ValueError(
-                f"Currently channel dimensions >1 ({input_tensor.shape[1]}) are not supported. "
+                f"Currently channel dimensions > 1 ({input_tensor.shape[1]}) are not supported. "
                 "Please use multiple parallel filter layers if you want "
                 "to filter multiple channels."
             )
         if input_tensor.shape != guidance_tensor.shape:
             raise ValueError(
-                "Shape of input image must equal shape of guidance image."
-                f"Got {input_tensor.shape} and {guidance_tensor.shape}."
+                f"Shape of input image must equal shape of guidance image, got {input_tensor.shape} and {guidance_tensor.shape}."
             )
 
         len_input = len(input_tensor.shape)
+        spatial_dims = len_input - 2
 
         # C++ extension so far only supports 5-dim inputs.
-        if len_input == 3:
+        if spatial_dims == 1:
             input_tensor = input_tensor.unsqueeze(3).unsqueeze(4)
             guidance_tensor = guidance_tensor.unsqueeze(3).unsqueeze(4)
-        elif len_input == 4:
+        elif spatial_dims == 2:
             input_tensor = input_tensor.unsqueeze(4)
             guidance_tensor = guidance_tensor.unsqueeze(4)
 
-        if self.len_spatial_sigma != len_input:
-            raise ValueError(f"Spatial dimension ({len_input}) must match initialized len(spatial_sigma).")
+        if self.len_spatial_sigma != spatial_dims:
+            raise ValueError(
+                f"Number of spatial dimensions ({spatial_dims}) must match initialized `len(spatial_sigma)`."
+            )
 
         prediction = TrainableJointBilateralFilterFunction.apply(
             input_tensor, guidance_tensor, self.sigma_x, self.sigma_y, self.sigma_z, self.sigma_color
         )
 
         # Make sure to return tensor of the same shape as the input.
-        if len_input == 3:
+        if spatial_dims == 1:
             prediction = prediction.squeeze(4).squeeze(3)
-        elif len_input == 4:
+        elif spatial_dims == 2:
             prediction = prediction.squeeze(4)
 
         return prediction
@@ -680,6 +680,7 @@ def load_old_state_dict(self, old_state_dict: dict, verbose=False) -> None:
 
         Args:
             old_state_dict: state dict from the old AutoencoderKL model.
+            verbose: if True, print diagnostic information about key mismatches.
         """
 
         new_state_dict = self.state_dict()
@@ -715,13 +716,39 @@ def load_old_state_dict(self, old_state_dict: dict, verbose=False) -> None:
             new_state_dict[f"{block}.attn.to_k.bias"] = old_state_dict.pop(f"{block}.to_k.bias")
             new_state_dict[f"{block}.attn.to_v.bias"] = old_state_dict.pop(f"{block}.to_v.bias")
 
-            # old version did not have a projection so set these to the identity
-            new_state_dict[f"{block}.attn.out_proj.weight"] = torch.eye(
-                new_state_dict[f"{block}.attn.out_proj.weight"].shape[0]
-            )
-            new_state_dict[f"{block}.attn.out_proj.bias"] = torch.zeros(
-                new_state_dict[f"{block}.attn.out_proj.bias"].shape
-            )
+            out_w = f"{block}.attn.out_proj.weight"
+            out_b = f"{block}.attn.out_proj.bias"
+            proj_w = f"{block}.proj_attn.weight"
+            proj_b = f"{block}.proj_attn.bias"
+
+            if out_w in new_state_dict:
+                if proj_w in old_state_dict:
+                    new_state_dict[out_w] = old_state_dict.pop(proj_w)
+                    if proj_b in old_state_dict:
+                        new_state_dict[out_b] = old_state_dict.pop(proj_b)
+                    else:
+                        new_state_dict[out_b] = torch.zeros(
+                            new_state_dict[out_b].shape,
+                            dtype=new_state_dict[out_b].dtype,
+                            device=new_state_dict[out_b].device,
+                        )
+                else:
+                    # No legacy proj_attn - initialize out_proj to identity/zero
+                    new_state_dict[out_w] = torch.eye(
+                        new_state_dict[out_w].shape[0],
+                        dtype=new_state_dict[out_w].dtype,
+                        device=new_state_dict[out_w].device,
+                    )
+                    new_state_dict[out_b] = torch.zeros(
+                        new_state_dict[out_b].shape,
+                        dtype=new_state_dict[out_b].dtype,
+                        device=new_state_dict[out_b].device,
+                    )
+            elif proj_w in old_state_dict:
+                # new model has no out_proj at all - discard the legacy keys so they
+                # don't surface as "unexpected keys" during load_state_dict
+                old_state_dict.pop(proj_w)
+                old_state_dict.pop(proj_b, None)
 
         # fix the upsample conv blocks which were renamed postconv
         for k in new_state_dict:
 
@@ -254,6 +254,20 @@ class SegResNetDS(nn.Module):
                     image spacing into an approximately isotropic space.
                     Otherwise, by default, the kernel size and downsampling is always isotropic.
 
+                    **Spatial shape constraints**: If ``resolution`` is ``None`` (isotropic mode),
+                    each spatial dimension must be divisible by ``2 ** (len(blocks_down) - 1)``.
+                    With the default ``blocks_down=(1, 2, 2, 4)``, each dimension must be
+                    divisible by 8. If ``resolution`` is provided (anisotropic mode),
+                    divisibility can differ per dimension; use :py:meth:`shape_factor` for
+                    the exact required factors and :py:meth:`is_valid_shape` to verify a shape.
+
+                    Example::
+
+                        model = SegResNetDS(spatial_dims=3, blocks_down=(1, 2, 2, 4))
+                        print(model.shape_factor())  # [8, 8, 8]
+                        print(model.is_valid_shape((1, 1, 128, 128, 128)))  # True
+                        print(model.is_valid_shape((1, 1, 100, 100, 100)))  # False
+
     """
 
     def __init__(
 
@@ -47,6 +47,19 @@ class SwinUNETR(nn.Module):
     Swin UNETR based on: "Hatamizadeh et al.,
     Swin UNETR: Swin Transformers for Semantic Segmentation of Brain Tumors in MRI Images
     <https://arxiv.org/abs/2201.01266>"
+
+    Spatial Shape Constraints:
+        Each spatial dimension of the input must be divisible by ``patch_size ** 5``.
+        With the default ``patch_size=2``, this means each spatial dimension must be divisible by **32**
+        (i.e., 2^5 = 32). This requirement comes from the patch embedding step followed by 4 stages
+        of PatchMerging downsampling, each halving the spatial resolution.
+
+        For a custom ``patch_size``, the divisibility requirement is ``patch_size ** 5``.
+
+        Examples of valid 3D input sizes (with default ``patch_size=2``):
+        ``(32, 32, 32)``, ``(64, 64, 64)``, ``(96, 96, 96)``, ``(128, 128, 128)``, ``(64, 32, 192)``.
+
+        A ``ValueError`` is raised in ``forward()`` if the input spatial shape violates this constraint.
     """
 
     def __init__(
@@ -76,7 +89,8 @@ def __init__(
         Args:
             in_channels: dimension of input channels.
             out_channels: dimension of output channels.
-            patch_size: size of the patch token.
+            patch_size: size of the patch token. Input spatial dimensions must be divisible by
+                ``patch_size ** 5`` (e.g., divisible by 32 when ``patch_size=2``).
             feature_size: dimension of network feature size.
             depths: number of layers in each stage.
             num_heads: number of attention heads.
@@ -108,6 +122,10 @@ def __init__(
             # for 2D single channel input with size (96,96), 2-channel output and gradient checkpointing.
             >>> net = SwinUNETR(in_channels=3, out_channels=2, use_checkpoint=True, spatial_dims=2)
 
+        Raises:
+            ValueError: When a spatial dimension of the input is not divisible by ``patch_size ** 5``.
+                Use ``net._check_input_size(spatial_shape)`` to validate a shape before inference.
+
         """
 
         super().__init__()
 
@@ -273,7 +273,7 @@ def __call__(self, signal: NdarrayOrTensor) -> NdarrayOrTensor:
         data = convert_to_tensor(self.freqs * time_partial)
         sine_partial = self.magnitude * torch.sin(data)
 
-        loc = np.random.choice(range(length))
+        loc = self.R.choice(range(length))
         signal = paste(signal, sine_partial, (loc,))
 
         return signal
@@ -354,7 +354,7 @@ def __call__(self, signal: NdarrayOrTensor) -> NdarrayOrTensor:
         time_partial = np.arange(0, round(self.fracs * length), 1)
         squaredpulse_partial = self.magnitude * squarepulse(self.freqs * time_partial)
 
-        loc = np.random.choice(range(length))
+        loc = self.R.choice(range(length))
         signal = paste(signal, squaredpulse_partial, (loc,))
 
         return signal
 
@@ -1049,19 +1049,34 @@ class ConvertToMultiChannelBasedOnBratsClasses(Transform):
     which include TC (Tumor core), WT (Whole tumor) and ET (Enhancing tumor):
     label 1 is the necrotic and non-enhancing tumor core, which should be counted under TC and WT subregion,
     label 2 is the peritumoral edema, which is counted only under WT subregion,
-    label 4 is the GD-enhancing tumor, which should be counted under ET, TC, WT subregions.
+    the specified `et_label` (default 4) is the GD-enhancing tumor, which should be counted under ET, TC, WT subregions.
+
+    Args:
+        et_label: the label used for the GD-enhancing tumor (ET).
+        - Use 4 for BraTS 2018-2022.
+        - Use 3 for BraTS 2023.
+        Defaults to 4.
     """
 
     backend = [TransformBackends.TORCH, TransformBackends.NUMPY]
 
+    def __init__(self, et_label: int = 4) -> None:
+        if et_label in (1, 2):
+            raise ValueError(f"et_label cannot be 1 or 2, as these are reserved. Got {et_label}.")
+        self.et_label = et_label
+
     def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
         # if img has channel dim, squeeze it
         if img.ndim == 4 and img.shape[0] == 1:
             img = img.squeeze(0)
 
-        result = [(img == 1) | (img == 4), (img == 1) | (img == 4) | (img == 2), img == 4]
-        # merge labels 1 (tumor non-enh) and 4 (tumor enh) and 2 (large edema) to WT
-        # label 4 is ET
+        result = [
+            (img == 1) | (img == self.et_label),
+            (img == 1) | (img == self.et_label) | (img == 2),
+            img == self.et_label,
+        ]
+        # merge labels 1 (tumor non-enh) and self.et_label (tumor enh) and 2 (large edema) to WT
+        # self.et_label is ET (4 or 3)
         return torch.stack(result, dim=0) if isinstance(img, torch.Tensor) else np.stack(result, axis=0)