build_model.py 2.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. import torch
  2. from langchain.llms.base import LLM
  3. from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
  4. from typing import Optional, Any, List
  5. from langchain_core.callbacks import CallbackManagerForLLMRun
  6. import os
  7. from peft import PeftModel, PeftModelForCausalLM
  8. from config.config import log
  9. #自定义千问模型支持接口
  10. class Qwen_LLM(LLM):
  11. tokenizer: AutoTokenizer = None
  12. model: AutoModelForCausalLM = None
  13. generation_config: GenerationConfig = None
  14. model_wrapper: PeftModelForCausalLM = None
  15. def __init__(self, model_path):
  16. super(Qwen_LLM, self).__init__()
  17. self.tokenizer = AutoTokenizer.from_pretrained(model_path)
  18. self.model = AutoModelForCausalLM.from_pretrained(model_path,
  19. torch_dtype=torch.float16,
  20. device_map={"":0})
  21. self.model_wrapper = PeftModel.from_pretrained(self.model, os.environ.get("MATH_LORA_PATH"), adapter_name=os.environ.get("MATH_LORA_NAME"))
  22. self.model_wrapper.load_adapter(os.environ.get("PHYSICS_LORA_PATH"), adapter_name=os.environ.get("PHYSICS_LORA_NAME"))
  23. self.generation_config = GenerationConfig(
  24. temperature=0.05,
  25. top_p=0.7,
  26. do_sample=True,
  27. max_new_tokens=600, # max_length=max_new_tokens+input_sequence
  28. repetition_penalty=1.02,
  29. eos_token_id=self.tokenizer.eos_token_id
  30. )
  31. def _call(
  32. self,
  33. prompt: str,
  34. stop: Optional[List[str]] = None,
  35. run_manager: Optional[CallbackManagerForLLMRun] = None,
  36. **kwargs: Any,
  37. ) -> str:
  38. subject_id = kwargs.get("subject_id", -1)
  39. log.info("值:"+str(subject_id))
  40. #物理学科
  41. if subject_id == 12:
  42. #self.model_wrapper.set_adapter(os.environ.get("PHYSICS_LORA_NAME"))
  43. PeftModel.from_pretrained(self.model, os.environ.get("PHYSICS_LORA_PATH"),
  44. adapter_name=os.environ.get("PHYSICS_LORA_NAME"))
  45. if subject_id == 3:
  46. self.model_wrapper = PeftModel.from_pretrained(self.model, os.environ.get("MATH_LORA_PATH"), adapter_name=os.environ.get("MATH_LORA_NAME"))
  47. with torch.no_grad():
  48. ids = self.tokenizer.encode(prompt) + [self.tokenizer.eos_token_id]
  49. input_ids = torch.tensor([ids]).cuda()
  50. output = self.model_wrapper.generate(input_ids=input_ids,
  51. generation_config=self.generation_config)
  52. out_ids = output.cpu()[0][input_ids.size(1):]
  53. answer = self.tokenizer.decode(out_ids, skip_special_tokens=True)
  54. log.info(answer)
  55. return answer
  56. def _llm_type(self) -> str:
  57. """Return type of llm"""
  58. return "qwen2"
  59. qwen2 = Qwen_LLM(os.environ.get("MODEL_PATH"))