import torch from langchain.llms.base import LLM from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig from typing import Optional, Any, List from langchain_core.callbacks import CallbackManagerForLLMRun import os from peft import PeftModel, PeftModelForCausalLM from config.config import log #自定义千问模型支持接口 class Qwen_LLM(LLM): tokenizer: AutoTokenizer = None model: AutoModelForCausalLM = None generation_config: GenerationConfig = None model_wrapper: PeftModelForCausalLM = None def __init__(self, model_path): super(Qwen_LLM, self).__init__() self.tokenizer = AutoTokenizer.from_pretrained(model_path) self.model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16, device_map={"":0}) self.model_wrapper = PeftModel.from_pretrained(self.model, os.environ.get("MATH_LORA_PATH"), adapter_name=os.environ.get("MATH_LORA_NAME")) self.model_wrapper.load_adapter(os.environ.get("PHYSICS_LORA_PATH"), adapter_name=os.environ.get("PHYSICS_LORA_NAME")) self.generation_config = GenerationConfig( temperature=0.05, top_p=0.7, do_sample=True, max_new_tokens=600, # max_length=max_new_tokens+input_sequence repetition_penalty=1.02, eos_token_id=self.tokenizer.eos_token_id ) def _call( self, prompt: str, stop: Optional[List[str]] = None, run_manager: Optional[CallbackManagerForLLMRun] = None, **kwargs: Any, ) -> str: subject_id = kwargs.get("subject_id", -1) log.info("值:"+str(subject_id)) #物理学科 if subject_id == 12: #self.model_wrapper.set_adapter(os.environ.get("PHYSICS_LORA_NAME")) PeftModel.from_pretrained(self.model, os.environ.get("PHYSICS_LORA_PATH"), adapter_name=os.environ.get("PHYSICS_LORA_NAME")) if subject_id == 3: self.model_wrapper = PeftModel.from_pretrained(self.model, os.environ.get("MATH_LORA_PATH"), adapter_name=os.environ.get("MATH_LORA_NAME")) with torch.no_grad(): ids = self.tokenizer.encode(prompt) + [self.tokenizer.eos_token_id] input_ids = torch.tensor([ids]).cuda() output = self.model_wrapper.generate(input_ids=input_ids, generation_config=self.generation_config) out_ids = output.cpu()[0][input_ids.size(1):] answer = self.tokenizer.decode(out_ids, skip_special_tokens=True) log.info(answer) return answer def _llm_type(self) -> str: """Return type of llm""" return "qwen2" qwen2 = Qwen_LLM(os.environ.get("MODEL_PATH"))