1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162 |
- import torch
- from langchain.llms.base import LLM
- from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
- from typing import Optional, Any, List
- from langchain_core.callbacks import CallbackManagerForLLMRun
- import os
- from peft import PeftModel, PeftModelForCausalLM
- from config.config import log
- #自定义千问模型支持接口
- class Qwen_LLM(LLM):
- tokenizer: AutoTokenizer = None
- model: AutoModelForCausalLM = None
- generation_config: GenerationConfig = None
- model_wrapper: PeftModelForCausalLM = None
- def __init__(self, model_path):
- super(Qwen_LLM, self).__init__()
- self.tokenizer = AutoTokenizer.from_pretrained(model_path)
- self.model = AutoModelForCausalLM.from_pretrained(model_path,
- torch_dtype=torch.float16,
- device_map={"":0})
- self.model_wrapper = PeftModel.from_pretrained(self.model, os.environ.get("MATH_LORA_PATH"), adapter_name=os.environ.get("MATH_LORA_NAME"))
- self.model_wrapper.load_adapter(os.environ.get("PHYSICS_LORA_PATH"), adapter_name=os.environ.get("PHYSICS_LORA_NAME"))
- self.generation_config = GenerationConfig(
- temperature=0.05,
- top_p=0.7,
- do_sample=True,
- max_new_tokens=600, # max_length=max_new_tokens+input_sequence
- repetition_penalty=1.02,
- eos_token_id=self.tokenizer.eos_token_id
- )
- def _call(
- self,
- prompt: str,
- stop: Optional[List[str]] = None,
- run_manager: Optional[CallbackManagerForLLMRun] = None,
- **kwargs: Any,
- ) -> str:
- subject_id = kwargs.get("subject_id", -1)
- log.info("值:"+str(subject_id))
- #物理学科
- if subject_id == 12:
- #self.model_wrapper.set_adapter(os.environ.get("PHYSICS_LORA_NAME"))
- PeftModel.from_pretrained(self.model, os.environ.get("PHYSICS_LORA_PATH"),
- adapter_name=os.environ.get("PHYSICS_LORA_NAME"))
- if subject_id == 3:
- self.model_wrapper = PeftModel.from_pretrained(self.model, os.environ.get("MATH_LORA_PATH"), adapter_name=os.environ.get("MATH_LORA_NAME"))
- with torch.no_grad():
- ids = self.tokenizer.encode(prompt) + [self.tokenizer.eos_token_id]
- input_ids = torch.tensor([ids]).cuda()
- output = self.model_wrapper.generate(input_ids=input_ids,
- generation_config=self.generation_config)
- out_ids = output.cpu()[0][input_ids.size(1):]
- answer = self.tokenizer.decode(out_ids, skip_special_tokens=True)
- log.info(answer)
- return answer
- def _llm_type(self) -> str:
- """Return type of llm"""
- return "qwen2"
- qwen2 = Qwen_LLM(os.environ.get("MODEL_PATH"))
|