-
Notifications
You must be signed in to change notification settings - Fork 88
Description
when I use the lmquant-v0.0.0 branch to convert llama-2-7b model, the following erorr~
Traceback (most recent call last):
File "/root/anaconda3/envs/TensorRT4/lib/python3.10/runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/root/anaconda3/envs/TensorRT4/lib/python3.10/runpy.py", line 86, in _run_code
exec(code, run_globals)
File "/home/lsy/workspace/gitcode/deepcompressor/lmquant/llm/run.py", line 279, in
run(config)
File "/home/lsy/workspace/gitcode/deepcompressor/lmquant/llm/run.py", line 158, in run
smooth_cache = smooth_llm(model, config.quant, tokenizer=tokenizer, calib_config=config.calib)
File "/root/anaconda3/envs/TensorRT4/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
File "/home/lsy/workspace/gitcode/deepcompressor/lmquant/llm/quant/smooth.py", line 204, in smooth_llm
smooth_llm_decoder_layer(
File "/root/anaconda3/envs/TensorRT4/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
File "/home/lsy/workspace/gitcode/deepcompressor/lmquant/llm/quant/smooth.py", line 63, in smooth_llm_decoder_layer
smooth_cache[cache_key] = smooth_attention(
File "/root/anaconda3/envs/TensorRT4/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
File "/home/lsy/workspace/gitcode/deepcompressor/lmquant/quant/calib/smooth.py", line 181, in smooth_attention
scale = SmoothAttentionCalibrator(
File "/home/lsy/workspace/gitcode/deepcompressor/lmquant/quant/calib/calibrator/smooth.py", line 661, in calibrate
return super().calibrate(
File "/home/lsy/workspace/gitcode/deepcompressor/lmquant/quant/calib/calibrator/base/search.py", line 573, in calibrate
return self._calibrate_opts(
File "/home/lsy/workspace/gitcode/deepcompressor/lmquant/quant/calib/calibrator/base/search.py", line 1017, in _calibrate_opts
y = eval_mod(
File "/root/anaconda3/envs/TensorRT4/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/root/anaconda3/envs/TensorRT4/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
return forward_call(*args, **kwargs)
File "/root/anaconda3/envs/TensorRT4/lib/python3.10/site-packages/accelerate/hooks.py", line 170, in new_forward
output = module._old_forward(*args, **kwargs)
TypeError: LlamaAttention.forward() missing 1 required positional argument: 'position_embeddings'
and following is my pip package:
Package Version
absl-py 2.1.0
accelerate 1.0.1
aenum 3.1.16
aiofiles 24.1.0
aiohappyeyeballs 2.4.3
aiohttp 3.10.10
aiosignal 1.3.1
altair 5.4.1
annotated-types 0.7.0
antlr4-python3-runtime 4.9.3
anyio 4.6.2.post1
async-timeout 4.0.3
attributedict 0.3.0
attrs 24.2.0
backoff 2.2.1
bitsandbytes 0.48.2
blake3 1.0.8
blessings 1.7
blobfile 3.1.0
braceexpand 0.1.7
build 1.3.0
cachetools 5.5.0
certifi 2024.8.30
cffi 2.0.0
chardet 5.2.0
charset-normalizer 3.4.0
click 8.1.7
click-option-group 0.5.9
codecov 2.1.13
colorama 0.4.6
colored 2.3.1
coloredlogs 15.0.1
colour-runner 0.1.1
contourpy 1.3.0
coverage 7.6.4
cuda-bindings 12.9.4
cuda-pathfinder 1.3.2
cuda-python 12.9.4
cycler 0.12.1
DataProperty 1.0.1
datasets 3.1.0
deepdiff 8.0.1
diffusers 0.35.2
dill 0.3.8
distlib 0.3.9
distro 1.9.0
docstring_parser 0.17.0
einops 0.8.0
etcd3 0.12.0
evaluate 0.4.6
exceptiongroup 1.2.2
fastapi 0.115.4
ffmpy 0.4.0
filelock 3.16.1
flash_attn 2.7.4.post1
flashinfer-python 0.2.5
fonttools 4.54.1
frozenlist 1.5.0
fsspec 2024.9.0
gradio 3.35.2
gradio_client 0.2.9
grpcio 1.76.0
h11 0.14.0
h5py 3.12.1
hf-xet 1.2.0
httpcore 1.0.6
httpx 0.27.2
huggingface-hub 0.36.0
humanfriendly 10.0
idna 3.10
importlib_metadata 8.7.0
inspecta 0.1.3
Jinja2 3.1.4
jiter 0.6.1
joblib 1.4.2
jsonlines 4.0.0
jsonschema 4.23.0
jsonschema-specifications 2024.10.1
kiwisolver 1.4.7
lark 1.3.1
linkify-it-py 2.0.3
llguidance 0.7.29
lm_eval 0.4.2
lxml 6.0.2
markdown-it-py 2.2.0
MarkupSafe 3.0.2
matplotlib 3.9.2
mbstrdecoder 1.1.3
mdit-py-plugins 0.3.3
mdurl 0.1.2
meson 1.9.1
ml_dtypes 0.5.4
more-itertools 10.8.0
mpi4py 4.1.1
mpmath 1.3.0
multidict 6.1.0
multiprocess 0.70.16
narwhals 1.10.0
networkx 3.4.2
ninja 1.11.1.1
nltk 3.9.1
numexpr 2.10.1
numpy 1.24.0
nvidia-cublas-cu12 12.6.4.1
nvidia-cuda-cupti-cu12 12.6.80
nvidia-cuda-nvrtc-cu12 12.6.77
nvidia-cuda-runtime-cu12 12.6.77
nvidia-cudnn-cu12 9.5.1.17
nvidia-cufft-cu12 11.3.0.4
nvidia-cufile-cu12 1.11.1.6
nvidia-curand-cu12 10.3.7.77
nvidia-cusolver-cu12 11.7.1.2
nvidia-cusparse-cu12 12.5.4.2
nvidia-cusparselt-cu12 0.6.3
nvidia-ml-py 12.575.51
nvidia-modelopt 0.33.1
nvidia-modelopt-core 0.33.1
nvidia-nccl-cu12 2.26.2
nvidia-nvjitlink-cu12 12.6.85
nvidia-nvtx-cu12 12.6.77
nvtx 0.2.13
omegaconf 2.3.0
omniconfig 0.1.10
onnx 1.19.1
onnx_graphsurgeon 0.5.8
openai 1.52.1
opencv-python-headless 4.11.0.86
optimum 2.0.0
ordered-set 4.1.0
orderly-set 5.2.2
orjson 3.10.10
packaging 24.1
pandas 2.2.3
pathvalidate 3.2.1
peft 0.18.0
pillow 10.3.0
pip 25.3
platformdirs 4.3.6
pluggy 1.5.0
polygraphy 0.49.26
portalocker 2.10.1
propcache 0.2.0
protobuf 5.28.3
psutil 6.1.0
PuLP 3.3.0
pyarrow 17.0.0
pybind11 2.13.6
pycountry 24.6.1
pycparser 2.23
pycryptodomex 3.23.0
pydantic 2.12.4
pydantic_core 2.41.5
pydantic-settings 2.12.0
pydub 0.25.1
Pygments 2.18.0
pynvml 12.0.0
pyparsing 3.2.0
pyproject-api 1.8.0
pyproject_hooks 1.2.0
pytablewriter 1.2.0
python-dateutil 2.9.0.post0
python-dotenv 1.2.1
python-multipart 0.0.12
pytz 2024.2
PyYAML 6.0.2
pyzmq 27.1.0
qserve_backend 0.1.0
referencing 0.35.1
regex 2024.9.11
requests 2.32.3
rich 14.2.0
rootpath 0.1.1
rouge-score 0.1.2
rpds-py 0.20.0
sacrebleu 1.5.0
safetensors 0.4.5
scikit-learn 1.5.2
scipy 1.14.1
semantic-version 2.10.0
sentencepiece 0.2.0
setuptools 79.0.1
six 1.16.0
sniffio 1.3.1
soundfile 0.13.1
sqlitedict 2.1.0
starlette 0.41.0
StrEnum 0.4.15
sympy 1.13.3
tabledata 1.3.3
tcolorpy 0.1.6
tenacity 9.1.2
tensorrt 10.11.0.33
tensorrt_cu12 10.11.0.33
tensorrt_cu12_bindings 10.11.0.33
tensorrt_cu12_libs 10.11.0.33
tensorrt_llm 1.0.0
termcolor 2.5.0
texttable 1.7.0
threadpoolctl 3.5.0
tiktoken 0.12.0
tokenizers 0.21.4
toml 0.10.2
tomli 2.0.2
torch 2.7.1
torchaudio 2.1.1
torchprofile 0.0.4
torchvision 0.22.1
tox 4.23.2
tqdm 4.66.5
tqdm-multiprocess 0.0.11
transformers 4.53.1
triton 3.3.1
typepy 1.3.2
typing_extensions 4.15.0
typing-inspection 0.4.2
tzdata 2024.2
uc-micro-py 1.0.3
urllib3 2.2.3
uvicorn 0.32.0
virtualenv 20.27.0
webdataset 0.2.100
websockets 12.0
wheel 0.44.0
word2number 1.1
xformers 0.0.23
xgrammar 0.1.21
xxhash 3.5.0
yarl 1.16.0
zipp 3.23.0
zstandard 0.23.0