Skip to content
Permalink

Comparing changes

This is a direct comparison between two commits made in this repository or its related repositories. View the default comparison for this range or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: InternLM/lmdeploy
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: bf616820b5e1fd39c5f2d6d8691ef3317b9dba81
Choose a base ref
..
head repository: InternLM/lmdeploy
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: 20c7476594ede66fa48ef4377ca73932a322a59c
Choose a head ref
2 changes: 1 addition & 1 deletion examples/python/README.md
Original file line number Diff line number Diff line change
@@ -12,4 +12,4 @@

`python3 offline_vl.py models/llava-interleave-qwen-7b-hf`

`python3 offline_vl.py models/llava-interleave-qwen-7b-hf/awq --model-format awq`
`python3 offline_vl.py models/llava-interleave-qwen-7b-hf/awq --model-format awq`
48 changes: 23 additions & 25 deletions examples/python/offline_vl.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,34 @@
from lmdeploy import pipeline, TurbomindEngineConfig, GenerationConfig
from lmdeploy.vl import load_image
import argparse

if __name__ == "__main__":
from lmdeploy import GenerationConfig, TurbomindEngineConfig, pipeline
from lmdeploy.vl import load_image

if __name__ == '__main__':
parser = argparse.ArgumentParser(description='test model')
parser.add_argument('model_path',
type=str,
help='the path of the model in localhost or '
'the repo_id of the model in huggingface.co',
default="llava-hf/llava-interleave-qwen-7b-hf")
parser.add_argument(
'--model-format',
type=str,
help='model format',
default='hf',
choices=['hf', 'awq'])
parser.add_argument(
'--max-new-tokens',
type=int,
help='output max tokens number',
default=128)
default='llava-hf/llava-interleave-qwen-7b-hf')
parser.add_argument('--model-format',
type=str,
help='model format',
default='hf',
choices=['hf', 'awq'])
parser.add_argument('--max-new-tokens',
type=int,
help='output max tokens number',
default=128)
args = parser.parse_args()
pipe = pipeline(args.model_path,
backend_config=TurbomindEngineConfig(
cache_max_entry_count=0.5,
model_format=args.model_format),
gen_config=GenerationConfig(
max_new_tokens=args.max_new_tokens))
pipe = pipeline(
args.model_path,
backend_config=TurbomindEngineConfig(cache_max_entry_count=0.5,
model_format=args.model_format),
gen_config=GenerationConfig(max_new_tokens=args.max_new_tokens))

image = load_image('https://qianwen-res.oss-cn-beijing.aliyuncs.com/\
Qwen-VL/assets/demo.jpeg')
for prompt in ["Describe the image.", "How many people in the image?"]:
print(f"prompt:{prompt}")
image = load_image('https://qianwen-res.oss-cn-beijing.aliyuncs.com/' +
'Qwen-VL/assets/demo.jpeg')
for prompt in ['Describe the image.', 'How many people in the image?']:
print(f'prompt:{prompt}')
response = pipe((prompt, image))
print(response)
2 changes: 1 addition & 1 deletion lmdeploy/turbomind/deploy/source_model/__init__.py
Original file line number Diff line number Diff line change
@@ -5,9 +5,9 @@
from .internlm2 import InternLM2Model # noqa: F401
from .internvl import InternVLModel # noqa: F401
from .llama import LlamaModel # noqa: F401
from .llava_qwen2 import LlavaQwen2Model # noqa: F401
from .meta_llama import MetaLlamaModel # noqa: F401
from .minicpmv import MiniCPMVModel # noqa: F401
from .mixtral import MixtralModel # noqa: F401
from .qwen import QwenModel # noqa: F401
from .xcomposer2 import Xcomposer2Model # noqa: F401
from .llava_qwen2 import LlavaQwen2Model # noqa: F401
8 changes: 4 additions & 4 deletions lmdeploy/turbomind/deploy/source_model/llava_qwen2.py
Original file line number Diff line number Diff line change
@@ -3,7 +3,7 @@
import os.path as osp

from .base import INPUT_MODELS
from .llama import LlamaReader, LlamaModel
from .llama import LlamaModel, LlamaReader


class LlavaQwen2Reader(LlamaReader):
@@ -18,8 +18,8 @@ class LlavaQwen2Reader(LlamaReader):
def __init__(self, new_params: dict, unused_params: dict, last_bin: bool,
model_cfg: dict, policy):
model_cfg = model_cfg.get('text_config')
super().__init__(new_params, unused_params, last_bin,
model_cfg, policy)
super().__init__(new_params, unused_params, last_bin, model_cfg,
policy)


@INPUT_MODELS.register_module(name='llava_qwen2')
@@ -52,7 +52,7 @@ def model_info(self):
# special for the model: llava-hf/llava-interleave-qwen-7b-hf
hidden_units = model_arg.get('hidden_size', 4096)
vocab_size = model_arg.get('vocab_size', 152000)
intermediate_size = model_arg.get("intermediate_size", 11008)
intermediate_size = model_arg.get('intermediate_size', 11008)
attn_bias = int(model_arg.get('attn_bias', 1))
use_logn_attn = int(model_arg.get('use_logn_attn', 0))

8 changes: 4 additions & 4 deletions lmdeploy/turbomind/generate_gemm_config.py
Original file line number Diff line number Diff line change
@@ -60,13 +60,13 @@ def main(head_num: int = 32,
inter_size = config.intermediate_size
vocab_size = config.vocab_size
except AttributeError as e:
if hasattr(config, "text_config"):
if hasattr(config, 'text_config'):
config = config.text_config
elif hasattr(config, "llm_config"):
elif hasattr(config, 'llm_config'):
config = config.llm_config
else:
raise AttributeError(f"not found attribute in {config},\
please check your model config file.{e}")
raise AttributeError(f'not found attribute in {config},\
please check your model config file.{e}')
head_num = config.num_attention_heads
size_per_head = config.hidden_size // head_num
inter_size = config.intermediate_size
2 changes: 1 addition & 1 deletion lmdeploy/turbomind/supported_models.py
Original file line number Diff line number Diff line change
@@ -26,7 +26,7 @@
LlavaLlamaForCausalLM='llama',
LlavaMistralForCausalLM='llama',
# Llava_interleave
LlavaForConditionalGeneration="llava_qwen2",
LlavaForConditionalGeneration='llava_qwen2',
# xcomposer2
InternLMXComposer2ForCausalLM='xcomposer2',
# internvl