File tree Expand file tree Collapse file tree 1 file changed +5
-1
lines changed
src/llmcompressor/modifiers/gptq Expand file tree Collapse file tree 1 file changed +5
-1
lines changed Original file line number Diff line number Diff line change @@ -174,6 +174,11 @@ def quantize_weight(
174174 Hinv = H = torch .eye (num_columns , dtype = H .dtype , device = H .device )
175175
176176 # See section 3.4 of https://arxiv.org/abs/2203.07259
177+ block_width = (
178+ quant_args .block_structure [1 ]
179+ if strategy == QuantizationStrategy .BLOCK
180+ else None
181+ )
177182 for i1 in range (0 , num_columns , blocksize ):
178183 i2 = min (i1 + blocksize , num_columns )
179184 count = i2 - i1
@@ -227,7 +232,6 @@ def quantize_weight(
227232 global_scale = global_scale ,
228233 )
229234 elif strategy == QuantizationStrategy .BLOCK :
230- block_width = quant_args .block_structure [1 ]
231235 block_column_idx = (i1 + i ) // block_width
232236 q = fake_quantize (
233237 q .unsqueeze (1 ),
You can’t perform that action at this time.
0 commit comments