Skip to content

Commit 7fcdfd6

Browse files
committed
Add GPTQ support for block quantization
1 parent 76ad054 commit 7fcdfd6

File tree

1 file changed

+5
-1
lines changed

1 file changed

+5
-1
lines changed

src/llmcompressor/modifiers/gptq/gptq_quantize.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,11 @@ def quantize_weight(
174174
Hinv = H = torch.eye(num_columns, dtype=H.dtype, device=H.device)
175175

176176
# See section 3.4 of https://arxiv.org/abs/2203.07259
177+
block_width = (
178+
quant_args.block_structure[1]
179+
if strategy == QuantizationStrategy.BLOCK
180+
else None
181+
)
177182
for i1 in range(0, num_columns, blocksize):
178183
i2 = min(i1 + blocksize, num_columns)
179184
count = i2 - i1
@@ -227,7 +232,6 @@ def quantize_weight(
227232
global_scale=global_scale,
228233
)
229234
elif strategy == QuantizationStrategy.BLOCK:
230-
block_width = quant_args.block_structure[1]
231235
block_column_idx = (i1 + i) // block_width
232236
q = fake_quantize(
233237
q.unsqueeze(1),

0 commit comments

Comments
 (0)