pytorch
diff --git a/‎aten/src/ATen/native/mps/operations/Normalization.mm
Copy file name to clipboardExpand all lines: aten/src/ATen/native/mps/operations/Normalization.mm
+1-7Lines changed: 1 addition & 7 deletions b/‎aten/src/ATen/native/mps/operations/Normalization.mm
Copy file name to clipboardExpand all lines: aten/src/ATen/native/mps/operations/Normalization.mm
+1-7Lines changed: 1 addition & 7 deletions
diff --git a/‎test/test_mps.py
Copy file name to clipboardExpand all lines: test/test_mps.py
+13Lines changed: 13 additions & 0 deletions b/‎test/test_mps.py
Copy file name to clipboardExpand all lines: test/test_mps.py
+13Lines changed: 13 additions & 0 deletions
@@ -153,12 +153,6 @@ static void get_shapes(MPSShape* input_shape_readonly,
     else
       channelsDim = num_input_dims - 1;
 
-    bool executeGatherOp = true;
-    if (self.is_contiguous(memory_format)) {
-      memory_format = MemoryFormat::Contiguous;
-      executeGatherOp = false;
-    }
-
     auto cachedGraph = LookUpOrCreateCachedGraph<CachedGraph>(key, [&](auto mpsGraph, auto newCachedGraph) {
       MPSGraphTensor* inputTensor = mpsGraphRankedPlaceHolder(mpsGraph, input_mps_dtype, input_shape);
       MPSGraphTensor* weightTensor = nil;
@@ -302,7 +296,7 @@ Check if running mean exists (maybe do this check before making graph)
       newCachedGraph->runningVarInplaceUpdate_ = runningVarInplaceUpdate;
     });
 
-    auto inputPlaceholder = Placeholder(cachedGraph->inputTensor_, self, input_shape, executeGatherOp);
+    auto inputPlaceholder = Placeholder(cachedGraph->inputTensor_, self, input_shape);
     auto weightPlaceholder = Placeholder();
     if (has_weight)
       weightPlaceholder = Placeholder(cachedGraph->weightTensor_, weight_opt.value(), new_mean_shape);
 
@@ -2541,6 +2541,19 @@ def test_batch_norm_backward(self):
         # This used to crash, see https://github.com/pytorch/pytorch/issues/98602
         outputs.sum().backward()
 
+    # Regression test for https://github.com/pytorch/pytorch/issues/133520
+    def test_batch_norm_slices(self):
+        bn_cpu = nn.BatchNorm2d(100, affine=False, device='cpu')
+        bn_mps = nn.BatchNorm2d(100, affine=False, device='mps')
+
+        x_cpu = torch.randn(100, 100, 35, 45).to('cpu')
+        x_mps = x_cpu.to('mps')
+
+        res_cpu = bn_cpu(x_cpu[5:])
+        res_mps = bn_mps(x_mps[5:])
+
+        self.assertEqual(res_cpu, res_mps)
+
     def test_layer_norm_backward(self):
         inputs = torch.rand(4, 4, device="mps", requires_grad=True)
         x = torch.nn.LayerNorm(4).to("mps")