Add GPTQ support for block quantization

zeel2104 · zeel2104 · commit 76ad054d7301 · 2026-03-28T13:27:31.000-04:00
diff --git a/src/llmcompressor/modifiers/gptq/gptq_quantize.py b/src/llmcompressor/modifiers/gptq/gptq_quantize.py
@@ -226,6 +226,16 @@ def quantize_weight(
                     altered_qargs,
                     global_scale=global_scale,
                 )
+            elif strategy == QuantizationStrategy.BLOCK:
+                block_width = quant_args.block_structure[1]
+                block_column_idx = (i1 + i) // block_width
+                q = fake_quantize(
+                    q.unsqueeze(1),
+                    scale[:, block_column_idx : block_column_idx + 1],
+                    zero_point[:, block_column_idx : block_column_idx + 1],
+                    quant_args,
+                    global_scale=global_scale,
+                ).squeeze(1)
             else:
                 raise ValueError(
                     f"Quantization strategy is not supported for GPTQ: {strategy}"
diff --git a/tests/llmcompressor/modifiers/gptq/test_gptq_quantize.py b/tests/llmcompressor/modifiers/gptq/test_gptq_quantize.py
@@ -0,0 +1,37 @@
+import torch
+from compressed_tensors.quantization import QuantizationArgs, QuantizationScheme
+
+from llmcompressor.modifiers.gptq.gptq_quantize import (
+    make_empty_hessian,
+    quantize_weight,
+)
+
+
+@torch.no_grad()
+def test_quantize_weight_supports_block_strategy():
+    module = torch.nn.Linear(7, 5, bias=False)
+    quant_args = QuantizationArgs(
+        num_bits=8,
+        symmetric=True,
+        strategy="block",
+        block_structure=[2, 4],
+    )
+    module.quantization_scheme = QuantizationScheme(
+        targets=["Linear"], weights=quant_args
+    )
+
+    hessian = make_empty_hessian(module)
+    hessian += torch.eye(hessian.shape[0], dtype=hessian.dtype, device=hessian.device)
+
+    loss, q_param_dict = quantize_weight(
+        module=module,
+        quant_args=quant_args,
+        hessian=hessian,
+        blocksize=3,
+    )
+
+    assert loss >= 0
+    assert q_param_dict["weight"].shape == module.weight.shape
+    assert q_param_dict["weight_scale"].shape == (3, 2)
+    assert q_param_dict["weight_zero_point"].shape == (3, 2)
+    assert "weight_g_idx" not in q_param_dict