bitsandbytes-foundation · matthewdouglas · Jun 18, 2025 · Jun 18, 2025
diff --git a/tests/test_modules.py b/tests/test_modules.py
@@ -284,7 +284,8 @@ def test_linear_kbit_fp32_bias(device, module):

 @pytest.mark.parametrize("device", get_available_devices())
 @pytest.mark.parametrize("module", module_dict.values(), ids=module_dict.keys())
-def test_kbit_backprop(device, module):
+@pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16])
+def test_kbit_backprop(device, module, dtype):
    b = 16
    dim1 = 36
    dim2 = 84
@@ -298,24 +299,28 @@ def test_kbit_backprop(device, module):

    kbit = nn.Sequential(*[torch.nn.Linear(dim1, dim2), module(dim2, 128)])

-    if device == "hpu" and isinstance(kbit[1], bnb.nn.Linear4bit) and kbit[1].weight.quant_type == "fp4":
-        pytest.skip("FP4 is not supported on HPU")
+    if (
+        device == "hpu"
+        and isinstance(kbit[1], bnb.nn.Linear4bit)
+        and not is_supported_on_hpu(kbit[1].weight.quant_type, dtype)
+    ):
+        pytest.skip("This configuration not supported on HPU")

    kbit[0].weight.detach().copy_(ref[0].weight)
    kbit[1].weight.detach().copy_(ref[1].weight)
    kbit[0].bias.detach().copy_(ref[0].bias)
    kbit[1].bias.detach().copy_(ref[1].bias)
    kbit[1].weight.requires_grad_(False)
-    ref = ref.half().to(device)
-    kbit = kbit.half().to(device)
-    kbit = kbit.half().to(device)
+    ref = ref.to(device=device, dtype=dtype)
+    kbit = kbit.to(device=device, dtype=dtype)
+    kbit = kbit.to(device=device, dtype=dtype)

    errs1 = []
    errs2 = []
    relerrs1 = []
    relerrs2 = []
    for i in range(100):
-        batch = torch.randn(b, dim1, device=device, dtype=torch.float16)
+        batch = torch.randn(b, dim1, device=device, dtype=dtype)
        out1 = ref(batch)
        out2 = kbit(batch)
        out1.mean().backward()